In [None]:
import beneath
import pandas as pd
import numpy as np
from datetime import datetime, timezone

### Create a client

Before creating a Beneath client, you'll need to authenticate your environment by calling `beneath auth SECRET` on the command-line.

In [None]:
client = beneath.Client()

### Config

In [None]:
username = "greenep12"
project_name = "lending-club"
stream_name = "loans-history"

STREAM_PATH = f"{username}/{project_name}/{stream_name}"
SCHEMA = open("loans_history.graphql", "r").read()

### Stage stream

In [None]:
stream = await client.stage_stream(
    stream_path=STREAM_PATH, 
    schema=SCHEMA,
)

### Stage instance

In [None]:
instance = await stream.stage_instance(version=0, make_primary=True)

### Write data to Beneath

Lending Club distributes their historical loan records as csv files that you can download [here](https://www.lendingclub.com/info/statistics.action).

These csv files have a slightly different schema from the new loans that Lending Club lists on its website. Because we would like schema consistency, in this script, we transform the schema from the old records to match the schema of the new records.

In [None]:
periods = ['2016Q1', '2016Q2', '2016Q3', '2016Q4', '2017Q1', '2017Q2', '2017Q3', '2017Q4', 
          '2018Q1', '2018Q2', '2018Q3', '2018Q4', '2019Q1', '2019Q2', '2019Q3', '2019Q4', '2020Q1']

for i in range(0, len(periods)):
    print('period: ', periods[i])
    
    print('reading file...')
    
    path = '~/Downloads/LoanStats_securev1_' + periods[i] + '.csv'
    data = pd.read_csv(path, skiprows=1)
    data = data[:-2] # cut out the notes at the bottom of each csv file

    print('aligning schema...')
    
    data['id'] = data['id'].apply(lambda x: int(x))
    data['issue_d'] = data['issue_d'].apply(lambda x: datetime.strptime(x, '%b-%Y'))
    data['term'] = data['term'].apply(lambda x: int(x[0:3]) if type(x) == str else x)
    data['int_rate'] = data['int_rate'].apply(lambda x: float(x[:-1]) if type(x) == str else x)
    data = data.rename(columns = {'loan_amnt': 'loan_amount'})
    data['loan_amount'] = data['loan_amount'].apply(lambda x: float(x))
    data['purpose'] = data['purpose'].apply(lambda x: str(x))
    data['home_ownership'] = data['home_ownership'].apply(lambda x: str(x))
    data['annual_inc'] = data['annual_inc'].apply(lambda x: float(x))
    data['addr_state'] = data['addr_state'].apply(lambda x: str(x))
    data['acc_now_delinq'] = data['acc_now_delinq'].apply(lambda x: int(x))
    data['dti'] = data['dti'].apply(lambda x: float(x))
    data['fico_range_high'] = data['fico_range_high'].apply(lambda x: int(x))
    data['open_acc'] = data['open_acc'].apply(lambda x: int(x))
    data['pub_rec'] = data['pub_rec'].apply(lambda x: int(x))
    data['revol_util'] = data['revol_util'].apply(lambda x: float(x[:-1]) if type(x) != float else x)
    data['loan_status'] = data['loan_status'].apply(lambda x: str(x))
    
    print('writing to Beneath...')
    
    async with instance.writer() as w:
        await w.write(data.to_dict('records'))