## Import Dependencies

In [1]:
import atoti
import pandas as pd
import time

## Load Data from S3 

In [2]:
# Load credit card info data
cc_df = pd.read_csv('s3://data.atoti.io/notebooks/retail-banking/sd254_cards.csv')
cc_df

Unnamed: 0,User,CARD INDEX,Card Brand,Card Type,Card Number,Expires,CVV,Has Chip,Cards Issued,Credit Limit,Acct Open Date,Year PIN last Changed,Card on Dark Web
0,0,0,Visa,Debit,4344676511950444,12/2022,623,YES,2,$24295,09/2002,2008,No
1,0,1,Visa,Debit,4956965974959986,12/2020,393,YES,2,$21968,04/2014,2014,No
2,0,2,Visa,Debit,4582313478255491,02/2024,719,YES,2,$46414,07/2003,2004,No
3,0,3,Visa,Credit,4879494103069057,08/2024,693,NO,1,$12400,01/2003,2012,No
4,0,4,Mastercard,Debit (Prepaid),5722874738736011,03/2009,75,YES,1,$28,09/2008,2009,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6141,1997,1,Amex,Credit,300609782832003,01/2024,663,YES,1,$6900,11/2000,2013,No
6142,1997,2,Visa,Credit,4718517475996018,01/2021,492,YES,2,$5700,04/2012,2012,No
6143,1998,0,Mastercard,Credit,5929512204765914,08/2020,237,NO,2,$9200,02/2012,2012,No
6144,1999,0,Mastercard,Debit,5589768928167462,01/2020,630,YES,1,$28074,01/2020,2020,No


## Analyze User Credit Card Data

In [3]:
# Find the distinct credit card brands
unique_values = cc_df['Card Brand'].unique()
print(sorted(unique_values))

['Amex', 'Discover', 'Mastercard', 'Visa']


In [4]:
# Find the distinct credit card combinations, and their frequency counts overall
cc_combinations_df = cc_df.groupby(['Card Brand', 'Card Type'], as_index = False).size()
cc_combinations_df

Unnamed: 0,Card Brand,Card Type,size
0,Amex,Credit,402
1,Discover,Credit,209
2,Mastercard,Credit,635
3,Mastercard,Debit,2191
4,Mastercard,Debit (Prepaid),383
5,Visa,Credit,811
6,Visa,Debit,1320
7,Visa,Debit (Prepaid),195


In [5]:
# Find the distinct credit card combinations for each user, and their frequency counts
cc_combinations_df = cc_df.groupby(['User', 'Card Brand', 'Card Type'], as_index = False).size()
cc_combinations_df

Unnamed: 0,User,Card Brand,Card Type,size
0,0,Mastercard,Debit (Prepaid),1
1,0,Visa,Credit,1
2,0,Visa,Debit,3
3,1,Mastercard,Debit,1
4,1,Mastercard,Debit (Prepaid),2
...,...,...,...,...
4646,1997,Mastercard,Debit,1
4647,1997,Visa,Credit,1
4648,1998,Mastercard,Credit,1
4649,1999,Mastercard,Debit,1


In [6]:
# Find the max frequency count all of distinct credit card combinations for users
max_cc_combinations = cc_combinations_df.groupby(['Card Brand', 'Card Type'], as_index = False)['size'].max()
max_cc_combinations

Unnamed: 0,Card Brand,Card Type,size
0,Amex,Credit,4
1,Discover,Credit,2
2,Mastercard,Credit,4
3,Mastercard,Debit,6
4,Mastercard,Debit (Prepaid),3
5,Visa,Credit,4
6,Visa,Debit,5
7,Visa,Debit (Prepaid),2


In [7]:
match_list = []

for index, row in max_cc_combinations.iterrows():
    match_list.append(row['Card Brand'] + " " + row['Card Type'])

match_list

cc_dict = {'Amex Credit': ['AMEX 1', 'AMEX 2', 'AMEX 3', 'AMEX 4'],
           'Discover Credit': ['DISC 1', 'DISC 2'],
           'Mastercard Credit': ['MCC 1', 'MCC 2', 'MCC 3', 'MCC 4'],
           'Mastercard Debit': ['MCD 1', 'MCD 2', 'MCD 3', 'MCD 4', 'MCD 5', 'MCD 6'],
           'Mastercard Debit (Prepaid)': ['MCP 1', 'MCP 2', 'MCP 3'],
           'Visa Credit': ['VC 1', 'VC 2', 'VC 3', 'VC 4'],
           'Visa Debit': ['VD 1', 'VD 2', 'VC 3', 'VC 4', 'VC 5'],
           'Visa Debit (Prepaid)': ['VDP 1', 'VDP 2']
           }


In [9]:
new_df = pd.DataFrame()

for user in cc_df['User'].unique():
    df = cc_df.loc[cc_df['User'] == user]
    
    # print(df)

    for index, row in df.iterrows():
        cc_combination = row['Card Brand'] + " " + row['Card Type']
        print(f"User {user} has a {cc_combination}")
        if cc_combination in match_list:
            cc_dict[cc_combination][0]
            print(f"  Assigning to... {cc_dict[cc_combination][0]}")
    # card_count = df['Card Brand'].value_counts()
    # print(f"User {user} has \n{card_count}")
    # print()
    
    

User 0 has a Visa Debit
  Assigning to...
User 0 has a Visa Debit
  Assigning to...
User 0 has a Visa Debit
  Assigning to...
User 0 has a Visa Credit
  Assigning to...
User 0 has a Mastercard Debit (Prepaid)
  Assigning to...
User 1 has a Visa Credit
  Assigning to...
User 1 has a Visa Debit
  Assigning to...
User 1 has a Mastercard Debit
  Assigning to...
User 1 has a Mastercard Debit (Prepaid)
  Assigning to...
User 1 has a Mastercard Debit (Prepaid)
  Assigning to...
User 2 has a Mastercard Debit
  Assigning to...
User 2 has a Mastercard Debit
  Assigning to...
User 2 has a Mastercard Debit
  Assigning to...
User 2 has a Visa Debit
  Assigning to...
User 2 has a Mastercard Debit
  Assigning to...
User 3 has a Visa Credit
  Assigning to...
User 3 has a Mastercard Debit (Prepaid)
  Assigning to...
User 3 has a Visa Debit
  Assigning to...
User 3 has a Visa Debit
  Assigning to...
User 4 has a Mastercard Debit
  Assigning to...
User 5 has a Visa Credit
  Assigning to...
User 5 has a V

In [None]:
match_list = []

for index, row in cc_combinations_df.iterrows():
    match_list.append(row['Card Brand'] + " " + row['Card Type'])

match_list

In [None]:
user_cc_df['Mastercard']['Debit']

In [None]:
for user in cc_df['User'].unique():
    df = cc_df.loc[cc_df['User'] == user]
    card_count = df['CARD INDEX'].count()
    user_cc_df = df.groupby(by=["Card Brand", "Card Type"], as_index = False).size()
    print(f"User {user} has {card_count} credit cards")
    print(user_cc_df)
    print()

    
    

In [None]:
card_count['Mastercard']