In [2]:
import itertools
from collections import defaultdict
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('ggplot')
import seaborn as sns


In [81]:

cc = pd.read_csv('cc_info.csv',index_col='credit_card')
transactions = pd.read_csv('transactions.csv')
transactions['date'] = pd.to_datetime(transactions.date)

Your boss wants to identify those users that in your dataset never went above the monthly credit card limit (calendar month). The goal of this is to automatically increase their limit. Can you send him the list of Ids?

In [4]:
transactions.date.dt.year.value_counts()

2015    294588
Name: date, dtype: int64

In [7]:
def monthly_spent_byuser(df):

    return df.groupby(df.date.dt.month)['transaction_dollar_amount'].agg('sum')

In [8]:
card_month_spents = transactions.groupby("credit_card").apply(monthly_spent_byuser).unstack(fill_value=0)

In [9]:
card_month_spents = card_month_spents.join(cc.credit_card_limit)
card_month_spents.head()

Unnamed: 0_level_0,7,8,9,10,credit_card_limit
credit_card,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1003715054175576,162.56,8441.51,11281.56,8954.21,20000
1013870087888817,281.43,11981.15,12461.96,12090.34,15000
1023820165155391,943.26,22741.2,18108.12,19259.98,28000
1073931538936472,220.07,2991.05,2557.12,1638.03,10000
1077622576192810,0.0,634.61,551.28,604.84,10000


In [15]:
card_month_spents.columns = card_month_spents.columns.astype(str)

In [16]:
card_month_spents.columns

Index([u'7', u'8', u'9', u'10', u'credit_card_limit'], dtype='object')

In [17]:
def never_above_limit(row):
    if row['7']<=row['credit_card_limit'] and row['8']<=row['credit_card_limit'] and row['9']<=row['credit_card_limit']\
    and row['10']<=row['credit_card_limit']:
        return "never above limit"
    else:
        return "above limit at least once"

In [18]:
card_month_spents['never_above_limit'] = card_month_spents.apply(never_above_limit, axis = 1)

In [19]:
card_month_spents.head()

Unnamed: 0_level_0,7,8,9,10,credit_card_limit,never_above_limit
credit_card,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
1003715054175576,162.56,8441.51,11281.56,8954.21,20000,never above limit
1013870087888817,281.43,11981.15,12461.96,12090.34,15000,never above limit
1023820165155391,943.26,22741.2,18108.12,19259.98,28000,never above limit
1073931538936472,220.07,2991.05,2557.12,1638.03,10000,never above limit
1077622576192810,0.0,634.61,551.28,604.84,10000,never above limit


In [21]:
never_above_limit = card_month_spents[card_month_spents['never_above_limit'] == 'never above limit']

In [23]:
never_above_limit.to_csv('never_above_limit.csv')

In [24]:
transactions.head()

Unnamed: 0,credit_card,date,transaction_dollar_amount,Long,Lat
0,1003715054175576,2015-09-11 00:32:40,43.78,-80.174132,40.26737
1,1003715054175576,2015-10-24 22:23:08,103.15,-80.19424,40.180114
2,1003715054175576,2015-10-26 18:19:36,48.55,-80.211033,40.313004
3,1003715054175576,2015-10-22 19:41:10,136.18,-80.174138,40.290895
4,1003715054175576,2015-10-26 20:08:22,71.82,-80.23872,40.166719


she wants you to implement an algorithm that as soon as a user goes above her monthly limit, it triggers an alert so that the user can be notiﬁed about that.We assume here that at the beginning of the new month, user total money spent gets reset to zero (i.e. she pays the card fully at the end of each month). Build a function that for each day, returns a list of users who went above their credit card monthly limit on that day.

In [115]:
def above_limit(row,check_month):
    if row[check_month] > row['credit_card_limit']:
        return 1
    else:
        return 0

In [118]:
import datetime
def check_limit(check_date, df):
    check_date = pd.to_datetime(check_date)
    check_month = str(check_date.month)
    df1 = df[df['date']<=check_date]
    card_spents = df1.groupby("credit_card").apply(monthly_spent_byuser).unstack(fill_value=0)
    card_spents = card_spents.join(cc.credit_card_limit)
    card_spents.columns = card_spents.columns.astype(str)
    
    if check_month in card_spents.columns.tolist():
        card_spents['above_limit'] = card_spents.apply(lambda row: above_limit(row, check_month), axis = 1)
    else:
        card_spents['above_limit'] = 0
    return card_spents

In [119]:
check_date = '2015-09-25'
check_limit(check_date, transactions)

Unnamed: 0_level_0,7,8,9,credit_card_limit,above_limit
credit_card,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1003715054175576,162.56,8441.51,9199.38,20000,0
1013870087888817,281.43,11981.15,10618.03,15000,0
1023820165155391,943.26,22741.20,14708.32,28000,0
1073931538936472,220.07,2991.05,1565.70,10000,0
1077622576192810,0.00,634.61,231.66,10000,0
1087468642191606,241.60,2735.77,1690.45,3000,0
1089479586475600,468.32,16103.93,14915.20,18000,0
1101164752077181,346.31,7326.02,4489.96,9000,0
1106824181265726,90.87,6734.92,6593.80,6000,1
1107936587129946,829.32,15534.48,13823.80,24000,0


Finally, your boss is very concerned about frauds cause they are a huge cost for credit card companies. She wants you to implement an unsupervised algorithm that returns all transactions that seem unusual and are worth being investigated further.

* People who use the money more than 2 times as their pervious month
* one transaction is larger than 90% percentile of all the transcations
* People who use the money far from their zipcode