# Creating a unique payments table

In [1]:
import pandas as pd
import numpy as np
import psycopg2
import pymongo
import json
import datetime
import pickle
import functions as fn

## Extracting payment information

In [2]:
# load the above mentioned pickle
with open('initial_5pct_transactions.pkl', 'rb') as f:
    initial_5pct = pickle.load(f)

In [3]:
payments = []
keys = (['note', 'action', 'status', 'date_created', 'id',
         'merchant_split_purchase', 'audience', 'date_completed'])
subdictionary_keys = ['target', 'actor']
# Onle including the keys in the payment target subdictionary that contains values
target_keys = ['redeemable_target', 'type']
user_key = ['user']
actor_key = ['id']

for transaction in initial_5pct:
    payment = {}
    payment_details = transaction['payment']
    for key, val in payment_details.items():
        if key in keys:
            unpacked = f'{key}'
            payment[unpacked] = val
        elif key in subdictionary_keys:
            for subkey, subval in val.items():
                if subkey in target_keys:
                    subkey_unpacked = f'{key}_{subkey}'
                    payment[subkey_unpacked] = subval
                elif subkey in user_key:
                    subkey_unpacked = f'{key}_{subkey}_{actor_key[0]}'
                    try:
                        subkey_unpacked_val = transaction['payment'][f'{key}'][f'{subkey}'][f'{actor_key[0]}']
                        payment[subkey_unpacked] = subkey_unpacked_val
                    except TypeError:
                        continue
                elif subkey in actor_key:
                    subkey_unpacked = f'{key}_{subkey}'
                    payment[subkey_unpacked] = subval
                else:
                    pass
        else:
            pass
    payments.append(payment.copy())

In [4]:
payments_df = pd.DataFrame(payments)

In [5]:
payments_df.head()

Unnamed: 0,action,actor_id,audience,date_completed,date_created,id,merchant_split_purchase,note,status,target_redeemable_target,target_type,target_user_id
0,pay,2206066431492096327,public,2018-07-26T18:48:10,2018-07-26T18:48:10,2532209455660008361,,for utilities,settled,,user,1572642482028544167
1,pay,2200417693859840681,public,2018-07-26T18:48:08,2018-07-26T18:48:08,2532209439595823434,,👕!,settled,,user,2242966299082752545
2,pay,2373608382922752189,public,2018-07-26T18:48:08,2018-07-26T18:48:08,2532209440686343010,,Thank you!,settled,,user,1984520039432192983
3,pay,1670504276557824171,public,2018-07-26T18:48:08,2018-07-26T18:48:08,2532209443756573591,,📱💸,settled,,user,1780528822878208201
4,pay,1957419122950144676,public,2018-07-26T18:48:09,2018-07-26T18:48:08,2532209445073584640,,Mt Dew & candy,settled,,user,2496761604079616999


In [6]:
payments_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 353829 entries, 0 to 353828
Data columns (total 12 columns):
action                      353829 non-null object
actor_id                    353829 non-null object
audience                    353829 non-null object
date_completed              351407 non-null object
date_created                353829 non-null object
id                          353829 non-null object
merchant_split_purchase     14 non-null object
note                        353829 non-null object
status                      353829 non-null object
target_redeemable_target    714 non-null object
target_type                 353829 non-null object
target_user_id              351407 non-null object
dtypes: object(12)
memory usage: 32.4+ MB


The 2422 values missing in the date_completed and target_user_id col come from those transactions that don't have a payee and as such they are never completed (deemed as pending or cancelled).

In [7]:
# Rename col id to payment_id for easier recognition in the db
payments_df = payments_df.rename(columns = {"id": "payment_id"}) 

In [8]:
# Converting the date_created and date_completed objects into a datetime.datetime field
payments_df['date_completed'] = pd.to_datetime(payments_df['date_completed'], format='%Y-%m-%dT%H:%M:%S')
payments_df['date_created'] = pd.to_datetime(payments_df['date_created'], format='%Y-%m-%dT%H:%M:%S')

In [9]:
payments_df.loc[payments_df['target_redeemable_target'].notnull()]['target_redeemable_target']

230       {'display_name': 'a user on iMessage', 'type':...
568       {'display_name': 'a user on iMessage', 'type':...
680       {'display_name': 'a user on iMessage', 'type':...
1546      {'display_name': 'a user on iMessage', 'type':...
1685      {'display_name': 'a user on iMessage', 'type':...
1691      {'display_name': 'a user on iMessage', 'type':...
1924      {'display_name': 'a user on iMessage', 'type':...
3255      {'display_name': 'a user on iMessage', 'type':...
3724      {'display_name': 'a user on iMessage', 'type':...
3937      {'display_name': 'a user on iMessage', 'type':...
4166      {'type': 'imessage', 'display_name': 'a user o...
4245      {'type': 'imessage', 'display_name': 'a user o...
4357      {'display_name': 'a user on iMessage', 'type':...
4557      {'type': 'imessage', 'display_name': 'a user o...
4903      {'type': 'imessage', 'display_name': 'a user o...
5321      {'type': 'imessage', 'display_name': 'a user o...
5740      {'type': 'imessage', 'display_

In [10]:
payments_df.loc[payments_df['merchant_split_purchase'].notnull()]

Unnamed: 0,action,actor_id,audience,date_completed,date_created,payment_id,merchant_split_purchase,note,status,target_redeemable_target,target_type,target_user_id
73057,charge,1062663232684032448,public,2018-07-27 14:59:55,2018-07-27 14:59:55,2532819349572420300,"{'merchant_name': 'Venmo Card', 'authorization...",🐠,settled,,user,1654487949246464697
149357,charge,2169161446850560960,public,2018-07-28 03:39:05,2018-07-28 03:39:05,2533201452092883634,"{'merchant_name': 'Grubhub', 'authorization_id...",:festival_grilled_cheese:🍟🌭🍔🥗,settled,,user,1648691387564032804
205899,charge,1998191457206272388,public,2018-07-28 14:22:24,2018-07-28 14:22:24,2533525241842369486,"{'merchant_name': 'Menufy', 'authorization_id'...",💯,settled,,user,1873593885523968226
215368,charge,1639972704616448085,public,2018-07-28 16:06:06,2018-07-28 16:06:06,2533577438529585462,"{'merchant_name': 'Seamless', 'authorization_i...",🍣,settled,,user,1732106371727360371
230045,charge,2406965112209408610,public,2018-07-28 18:40:40,2018-07-28 18:40:40,2533655236325671433,"{'merchant_name': 'Grubhub', 'authorization_id...",🐜🐜🐜,settled,,user,2453886279352320627
237898,charge,1638026044243968063,public,2018-07-28 20:05:18,2018-07-28 20:05:18,2533697829432459466,"{'merchant_name': 'Grubhub', 'authorization_id...",🍣,settled,,user,1634343554383872446
256143,charge,2163212782927872080,public,2018-07-28 22:41:27,2018-07-28 22:41:27,2533776426771940027,"{'merchant_name': 'Delivery.com', 'authorizati...",😘😘,settled,,user,2280649847209984586
258305,charge,1668441207472128952,public,2018-07-28 22:57:04,2018-07-28 22:57:04,2533784282602668089,"{'merchant_name': 'Grubhub', 'authorization_id...",Bibim-BOP,settled,,user,1432157742432256773
260214,charge,1809965043941376709,public,2018-07-28 23:27:44,2018-07-28 23:27:44,2533799716148216791,"{'merchant_name': 'Seamless', 'authorization_i...",mamma Mia I’m hungry!,settled,,user,1976472545787904626
272936,charge,1773974551789568897,public,2018-07-29 01:36:09,2018-07-29 01:36:09,2533864352042189493,"{'merchant_name': 'Venmo Card', 'authorization...",Do wop,settled,,user,2161829300142080397


Unsure about how to deal with the dictionaries, for now I will just keep them

In [11]:
payments_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 353829 entries, 0 to 353828
Data columns (total 12 columns):
action                      353829 non-null object
actor_id                    353829 non-null object
audience                    353829 non-null object
date_completed              351407 non-null datetime64[ns]
date_created                353829 non-null datetime64[ns]
payment_id                  353829 non-null object
merchant_split_purchase     14 non-null object
note                        353829 non-null object
status                      353829 non-null object
target_redeemable_target    714 non-null object
target_type                 353829 non-null object
target_user_id              351407 non-null object
dtypes: datetime64[ns](2), object(10)
memory usage: 32.4+ MB
