# Generate a unique user table

In [None]:
import pandas as pd
import numpy as np
import psycopg2
import pymongo
import json
import datetime
import pickle
import functions as fn
import psycopg2
import io
from sqlalchemy import create_engine

In [None]:
# load the above mentioned pickle
with open('initial_5pct_transactions.pkl', 'rb') as f:
    initial_5pct = pickle.load(f)

## Actor (payer) extraction and manipulation

In [None]:
# Transform actor information into a dataframe

# Identifying columns that contain null values
null_columns = (['email', 'friend_status', 'friends_count', 'identity',
                 'phone', 'trust_request']) 

payers = []
payer_ids = set()
for transaction in initial_5pct:
    actor = transaction['payment']['actor']
    actor_id = actor['id']
    if actor_id in payer_ids:
        continue
    else:
        payer_ids.add(actor_id)
        payer = {}
        for key, val in transaction['payment']['actor'].items():
            if key in null_columns:
                continue
            else:
                payer[key] = val
        payers.append(payer.copy())

payers_df = pd.DataFrame(payers)

In [None]:
payers_df.info()

In [None]:
# Investigate the only null value
payers_df.loc[payers_df['about'].isna()]

Colleen Methvin's case is strange because even though he doesn't have a username or a date_joined, he had made one successful transaction in the past, so we will not drop him from the table.

In [None]:
# Create a column to determine if they have personalised the about column

# Values for default come after having explored the data in the eda_venmo notebook
about_default = [' ', 'No Short Bio', 'No short bio', '\n', ' \n', '  ', 'No Short Bio\n']
about_personalised = [0 if about in about_default else 1 for about in payers_df['about']]  
payers_df['about_personalised'] = about_personalised

In [None]:
# Convert the date_joined objects into a datetime field
payers_df['date_joined'] = pd.to_datetime(payers_df['date_joined'], format='%Y-%m-%dT%H:%M:%S')

For the vast majority of users, display name is a combination of first and last name. However, for groups the display name is composed of first_name only. As a result, we will eliminate the first and last name columns to reduce the amount of data we load into our database. Moreover, we are not interested in the names for our analysis.

In [None]:
# Drop the first and last name columns
payers_df.drop(['first_name', 'last_name'], axis = 1, inplace=True)

In [None]:
# Create a column to determine if they have included a photo other than the default photo

# Values for default come after having explored the data in the eda_venmo notebook
pic_default = (['https://s3.amazonaws.com/venmo/no-image.gif', 
                'https://s3.amazonaws.com/venmo/placeholder-image/groups-placeholder.svg'])
pic_personalised = [0 if about in pic_default else 1 for about in payers_df['about']]  
payers_df['pic_personalised'] = pic_personalised

A special thought should be given to the pic_personalised column as it can potentially not add that much value. This is because if users sign up through facebook, their profile_picture is updated to their facebook's profile pic. Consequently, this doesn't necessarily lead to more transactions. 

In [None]:
payers_df.info()

## User (payee) extraction and manipulation

In [None]:
# Transform user information into a dataframe

# Identifying columns that contain null values
null_columns = (['email', 'friend_status', 'friends_count', 'identity',
                 'phone', 'trust_request']) 

payees = []
payee_ids = set()
counter = 0
for transaction in initial_5pct:
    user = transaction['payment']['target']['user']
    try:
        user_id = user['id']
    except TypeError:
        counter += 1
        continue
    if user_id in payee_ids:
        continue
    else:
        payee_ids.add(user_id)
        payee = {}
        for key, val in transaction['payment']['target']['user'].items():
            if key in null_columns:
                continue
            else:
                payee[key] = val
        payees.append(payee.copy())

In [None]:
payees_df = pd.DataFrame(payees)

In [None]:
payees_df.info()

In [None]:
# Create a column to determine if they have personalised the about column

# Values for default come after having explored the data in the eda_venmo notebook
about_default = [' ', 'No Short Bio', 'No short bio', '\n', ' \n', '  ', 'No Short Bio\n']
about_personalised = [0 if about in about_default else 1 for about in payees_df['about']]  
payees_df['about_personalised'] = about_personalised

In [None]:
# Convert the date_joined objects into a datetime field
payees_df['date_joined'] = pd.to_datetime(payees_df['date_joined'], format='%Y-%m-%dT%H:%M:%S')

In [None]:
payees_df.info()

In [None]:
# Drop the first and last name columns
payees_df.drop(['first_name', 'last_name'], axis = 1, inplace=True)

In [None]:
# Create a column to determine if they have included a photo other than the default photo

# Values for default come after having explored the data in the eda_venmo notebook
pic_default = (['https://s3.amazonaws.com/venmo/no-image.gif', 
                'https://s3.amazonaws.com/venmo/placeholder-image/groups-placeholder.svg'])
pic_personalised = [0 if about in pic_default else 1 for about in payees_df['about']]  
payees_df['pic_personalised'] = pic_personalised

In [None]:
payees_df.info()

## Unique user table

In [None]:
# Identifying the payees that have not been payers to make a complete user list

payees_not_payers = set()
for payee_id in payee_ids:
    if payee_id not in payer_ids:
        payees_not_payers.add(payee_id)

In [None]:
payees_not_payers_df = payees_df.loc[payees_df['id'].apply(lambda x: x in payees_not_payers)]

In [None]:
unique_users = pd.concat([payers_df, payees_not_payers_df], axis=0)
unique_users = unique_users.rename(columns = {"id": "user_id"}) 

In [None]:
len(payers_df) + len(payees_not_payers) == len(unique_users)

In [None]:
unique_users.info()

## Dropping resulting users table into the venmo_transactions db

In [None]:
# Move unique user table into database
engine = create_engine('postgresql://jjherranzsarrion:jj2gNozalo@localhost/venmo_transactions')
unique_users.to_sql('users', engine)