# Setup

## Import

In [2]:
import pandas as pd
import numpy as np
import datetime as dt

## Constants

In [3]:
CARDS_FILENAME = 'data/sd254_cards.csv'
USERS_FILENAME = 'data/sd254_users.csv'

In [4]:
def TurnStringIntoDateTime(x):
    return dt.datetime.strptime(x, '%d/%m/%Y').date()

# Cards Dataset

Keep:
- Index: User? Card Index?
- Card Brand
- Card Type
- Card Number
- Expiration date
    - Expiration Month
    - Expiration Year
- CVV
- Had Chip
- Cards Issued (Look into it)
- Credit Limit
- Account Open Date
    - Account Open Month
    - Account Open Year
- Year Pin last changed
- Card on Dark Web

In [5]:
def LoadCards():
    df = pd.read_csv(CARDS_FILENAME)
    df.columns = [x.lower() for x in df.columns]
    df.columns = [x.replace(' ', '_') for x in df.columns]

    #df.acct_open_date = df.apply(lambda x: dt.datetime.strptime(str(x), '%d/%m/%Y').date())
    #print(df.acct_open_date.apply(lambda x: dt.datetime.strptime(str(x), '%m/%Y').date()))
    df['acct_open_month'] = pd.DatetimeIndex(df['acct_open_date']).month
    df['acct_open_year'] = pd.DatetimeIndex(df['acct_open_date']).year
    df = df.drop('acct_open_date', axis=1)

    df['expires_month'] = pd.DatetimeIndex(df['expires']).month
    df['expires_year'] = pd.DatetimeIndex(df['expires']).year
    df = df.drop('expires', axis=1)
    #print(df.acct_open_date.apply(lambda x: print(str(x))))

    df['has_chip'] = df['has_chip'].replace('YES', True)
    df['has_chip'] = df['has_chip'].replace('NO', False)

    df['card_on_dark_web'] = df['card_on_dark_web'].replace('Yes', True)
    df['card_on_dark_web'] = df['card_on_dark_web'].replace('No', False)

    df = df[['user','card_index','card_brand','card_type','card_number','expires_month', 'expires_year', 
             'cvv', 'has_chip', 'cards_issued', 'credit_limit', 'acct_open_month', 'acct_open_year', 'year_pin_last_changed', 'card_on_dark_web']]

    return df

### Test

In [6]:
LoadCards().head()

Unnamed: 0,user,card_index,card_brand,card_type,card_number,expires_month,expires_year,cvv,has_chip,cards_issued,credit_limit,acct_open_month,acct_open_year,year_pin_last_changed,card_on_dark_web
0,0,0,Visa,Debit,4344676511950444,12,2022,623,True,2,$24295,9,2002,2008,False
1,0,1,Visa,Debit,4956965974959986,12,2020,393,True,2,$21968,4,2014,2014,False
2,0,2,Visa,Debit,4582313478255491,2,2024,719,True,2,$46414,7,2003,2004,False
3,0,3,Visa,Credit,4879494103069057,8,2024,693,False,1,$12400,1,2003,2012,False
4,0,4,Mastercard,Debit (Prepaid),5722874738736011,3,2009,75,True,1,$28,9,2008,2009,False


# User Dataset

Keep:
- Age
- Current Age
- Retirement age
- Birth Year
- Birth Month
- Gender
- City
- State
- Zipcode
- Per Capita Income Zipcode
- Yearly Income Person
- Total Debt
- FICO Score
- Number of Credit Cards

Drop:
- Person
- Address
- Apartment
- Latitude
- Longitude

In [7]:
def LoadUsers():
    df = pd.read_csv(USERS_FILENAME)
    df.columns = [x.lower() for x in df.columns]
    df.columns = [x.replace(' - ', '_') for x in df.columns]
    #df.columns = [x.replace(' ', '_') for x in df.columns]

    df = df.drop('person', axis=1)
    df = df.drop('address', axis=1)
    df = df.drop('apartment', axis=1)
    df = df.drop('latitude', axis=1)
    df = df.drop('longitude', axis=1)
    
    return df

In [8]:
LoadUsers().head()

Unnamed: 0,current age,retirement age,birth year,birth month,gender,city,state,zipcode,per capita income_zipcode,yearly income_person,total debt,fico score,num credit cards
0,53,66,1966,11,Female,La Verne,CA,91750,$29278,$59696,$127613,787,5
1,53,68,1966,12,Female,Little Neck,NY,11363,$37891,$77254,$191349,701,5
2,81,67,1938,11,Female,West Covina,CA,91792,$22681,$33483,$196,698,5
3,63,63,1957,1,Female,New York,NY,10069,$163145,$249925,$202328,722,4
4,43,70,1976,9,Male,San Francisco,CA,94117,$53797,$109687,$183855,675,1
