# Lending Loop Web App
This is a prototype for a Lending Loop web app / dashboard. This notebook focuses on the logics of Lending Loop summary calculations.

# System Initializations

### Package Initializations

In [1]:
# DataFrames
import pandas as pd
import findspark
import pyspark
import pyspark.sql.functions as F
import pyspark.sql.types as T

# Databases
import pymongo
import json

# Numerical Packages
import numpy as np

# Misc Packages
import os

# Visualizations
import plotly
import plotly.offline as pyo
import plotly.graph_objs as go

### System Configuration


In [46]:
# Plotly offline
pyo.init_notebook_mode(connected=True)

# Pandas display
pd.set_option('display.max_colwidth', -1)

# PySpark Session Initialization
packages = 'org.mongodb.spark:mongo-spark-connector_2.11:2.2.0'
dedicated_memory = '1g'

os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages {} --driver-memory {} pyspark-shell' \
    .format(packages, dedicated_memory)

In [3]:
# Find SPARK_HOME
findspark.init()

# Create SparkSession
spark = (pyspark.sql.SparkSession
         .builder.appName('LendingLoop')
         .getOrCreate())

### Use Cases
- Dashboard integration
    - How much was invested?
    - Net rate of return
    - Distribution of notes
    - Diversification index
    - Bar chart showing amount regained from each note
- Update Lending Loop transactions (hopefully somewhat automatically)

### TODO
- Develop one-time database initialization technique (Indexing function might be done separately)
- Develop method to update info 
- Figure out if states are saved or inferred through transactions
    - Probably have a transaction collection and a state collection. Update `state` using `transaction`s if `transaction`'s input time was later than `state`'s update time
- Figure out collection schemas
- Figure out how the overall scheme would look like
    - Payment Bar Chart Visualization
        - Bar chart showing amount left to recovering investment (and amount of interest earned)
        - Overall (x-axis: All note grades) (maybe use a windrose?)
        - By group (x-axis: All active notes within group)
        - By lists of invidiual notes
    - Distribution of Notes Histogram
        - x-axis (Note Grade)
        - y-axis (% of invested income)
        - Hoverinfo (% invested income, amount invested, number of notes)
    - Weighted Average Net ROI
        - Text Displaying Weighted Average Net ROI 

### Helper Function Definitions

In [107]:
def import_new_notes(DF, db='lendingLoop', coll='notes'):
    '''
    Search DF for new notes and import them into MongoDB.
    '''
    
    @F.udf(returnType=T.DoubleType())
    def calculate_principal(unitPay, interestRate, totalPayCycles):
        '''
        Calculate the principal invested based on the unitPay, interestRate and totalPayCycles.
        '''
        interestRate /= 100 * 12

        return unitPay / interestRate * (1 - 1. / (1 + interestRate)**totalPayCycles)
    
    # Obtain handle to Mongo database and collection
    client = pymongo.MongoClient()
    collection = client[db][coll]
    
    # Retrieve existing notes ID from MongoDB
    if collection.count() == 0:
        existingNotesDF = spark.createDataFrame(spark.sparkContext.emptyRDD(), 
                                                schema=T.StructType([T.StructField('_id', T.IntegerType())]))
    else:
        try:
            existingNotesDF = (spark
                    .read
                    .format('com.mongodb.spark.sql.DefaultSource')
                    .option('uri', 'mongodb://127.0.0.1/{}.{}'.format(db, coll))
                    .load()
                    .select(F.col('_id').cast(T.IntegerType())))
        except Exception as e:
            print 'Failed to load existing notes id\'s.\n{}'.format(e)
            client.close()
            return

    
    # Obtain list of unique notes from input DF
    uniqueNotesDF = (DF
                     .orderBy('dueDate')
                     .groupBy('_id', 'company', 'loanName', 'interestRate', 'grade')
                     .agg(F.count('_id').alias('totalCycles'), 
                          F.round(F.mean('totalScheduled'), 2).alias('unitPayment'), 
                          F.add_months(F.first('dueDate'), -1).alias('startDate'))
                     .withColumn('principal', 
                                 F.round(calculate_principal('unitPayment', 'interestRate', 'totalCycles'), 0)))
    
    # Filter out new notes using left-anti join with `existingNotesDF`
    newNotesDF = (uniqueNotesDF
                  .join(existingNotesDF, '_id', 'leftanti')
                  .withColumn('remainingCycles', F.udf(lambda x: x, T.LongType())('totalCycles'))
                  .withColumn('feesAccrued', F.lit(0.00))
                  .withColumn('amountRepayed', F.lit(0.00)))
    
    # Save new notes into MongoDB
    try:
        (newNotesDF
         .write
         .format('com.mongodb.spark.sql.DefaultSource')
         .option('uri', 'mongodb://127.0.0.1/{}.{}'.format(db, coll))
         .mode('append')
         .save())
    except Exception as e:
        print 'Failed to load new notes into MongoDB.\n{}'.format(e)
        client.close()
        return
    
    if newNotesDF.count() == 1:
        print 'Import successful. One new note was successfully imported to "{}.{}'.format(db, coll)
    else:
        print 'Import successful. {} new notes were successfully imported to "{}.{}"'.format(newNotesDF.count(), 
                                                                                             db, 
                                                                                             coll)
    client.close()
    return

def load_mongo_to_spark(coll, db='lendingLoop'):
    '''
    Load the Mongo database to a Spark Session and returns the Spark DataFrame
    '''
    try:
        return (spark
                .read
                .format('com.mongodb.spark.sql.DefaultSource')
                .option('uri', 'mongodb://127.0.0.1/{}.{}'.format(db, coll))
                .load())
    except Exception as e:
        print 'Failed to create Spark dataframe.\n{}'.format(e)

def display_DF(sparkDF, n=5):
    '''
    Interactively displays the first n rows of a sparkDF as a pandas dataframe
    '''
    return (sparkDF
            .limit(n)
            .toPandas())

# Test Area

### Import dataset

In [100]:
# Read raw CSV file
rawDF = (spark
         .read
         .format('com.databricks.spark.csv')
         .option('header', 'True')
         .option('inferschema', 'True')
         .load('all_payments.csv'))

# Camel case titles
camelCaseDict = {title: title[0].lower() + title.replace(' ', '')[1:] for title in rawDF.columns}

# Simplify certain column titles
camelCaseDict['Fees Paid to Loop'] = 'fees'
camelCaseDict['Risk Band'] = 'grade'
camelCaseDict['Loan Id'] = '_id'

# Camelcase column titles and replace `loadId` with `_id`
rawDF = rawDF.select([F.col(title).alias(camelCaseDict[title]) for title in camelCaseDict.keys()])

In [101]:
display_DF(rawDF)

Unnamed: 0,status,interestPaid,paymentType,principalScheduled,totalOwed,_id,grade,interestRate,company,principalPaid,dueDate,interestScheduled,totalScheduled,fees,loanName,principalOwed,totalPaid,interestOwed,datePaid
0,Scheduled,0.0,Scheduled Payment,2.39,3.25,59705,D,20.67,Nice and Smooth Ultramedia Inc.,0.0,2017-09-22,0.86,3.25,0.06,Concert Producer & Promoter,2.39,0.0,0.86,
1,Scheduled,0.0,Scheduled Payment,2.83,3.47,83910,B+,10.33,Bronze Baxx Tanning Studio Inc.,0.0,2017-09-22,0.64,3.47,0.09,Salon - Equipment Purchase (1 of 2),2.83,0.0,0.64,
2,Scheduled,0.0,Scheduled Payment,1.27,1.91,7802,B+,10.33,Brightpath Capital Corporation,0.0,2017-09-25,0.64,1.91,0.09,Mortgage Lender - Working Capital,1.27,0.0,0.64,
3,Scheduled,0.0,Scheduled Payment,0.82,1.83,96625,C,16.18,Rossco's Tree Service and contracting Ltd.,0.0,2017-09-30,1.01,1.83,0.09,Arborist - Lease Buyouts (1 of 3),0.82,0.0,1.01,
4,Scheduled,0.0,Scheduled Payment,1.63,2.64,58609,C,16.18,5686645 Manitoba LTD.,0.0,2017-10-07,1.01,2.64,0.09,Contractor - Working Capital,1.63,0.0,1.01,


### Add new notes to collection

In [108]:
import_new_notes(rawDF)

Import successful. 17 new notes were successfully imported to "lendingLoop.notes"


### Pull new collection

In [109]:
notesDF = load_mongo_to_spark('notes')
display_DF(notesDF)

Unnamed: 0,_id,amountRepayed,company,feesAccrued,grade,interestRate,loanName,principal,remainingCycles,startDate,totalCycles,unitPayment
0,4370,0.0,631343 alberta ltd,0.0,C+,14.11,Denture Clinic - Leasehold Improvements and Marketing,25.0,36,2017-09-19,36,0.85
1,7802,0.0,Brightpath Capital Corporation,0.0,B+,10.33,Mortgage Lender - Working Capital,75.0,48,2017-08-25,48,1.91
2,14941,0.0,Anthony C. C. Chan Inc,0.0,B+,10.33,Accounting Practice - Expansion/Renovation,75.0,24,2017-09-15,24,3.47
3,29560,0.0,NuEnergy Systems Inc.,0.0,A+,6.8,Integrated Solar Energy Services Company,75.0,36,2017-09-15,36,2.3
4,36403,0.0,Social Theorem Consulting Inc,0.0,D+,18.4,Digital Media Agency - Refinancing And Continued Expansion,25.0,36,2017-09-07,36,0.9


### Create custom progress bar

In [50]:
def progress_bar(notesDF):
    '''
    Creates a custom progress bar based on notesDF.
    '''
    def _convert_to_list(DF, colName):
        '''
        Extract the colName of DF as a list.
        '''
        return (DF
                .select(colName)
                .rdd
                .map(lambda x: x[0])
                .collect())
    
    def _create_trace(DF, colName):
        '''
        Create a trace based on colName
        '''
        colourDict = {
            'Total Repayment': 
        }
        
        return go.Bar(
            x = _convert_to_list(DF, colName),
            y = companiesList,
            name = colName,
            orientation = 'h',
            width = [3] * DF.count(),
            marker = {
                'line': {
                    'width': 1
                }
            }
        )
    
    # Define udfs
    @F.udf(returnType=T.FloatType())
    def calc_amount_oustanding(totalRepayment, principal):
        return max(principal - totalRepayment, 0.)
    
    @F.udf(returnType=T.FloatType())
    def calc_profits_received(totalRepayment, principal, interestRate, totalPaymentCycles):
        return max(principal * (interestRate / 1200)**totalPaymentCycles - max(totalRepayment - principal, 0), 0.)
    
    @F.udf(returnType=T.FloatType())
    def calc_profits_outstanding(totalRepayment, principal, interestRate, totalPaymentCycles):
        return max(max(totalRepayment - principal, 0) - principal * (interestRate / 1200)**totalPaymentCycles, 0.)
    
    # Add relevant columns to notesDF
    notesDF = (notesDF
               .withColumn('Amount Outstanding', calc_amount_oustanding('Total Repayment', 
                                                                        'Principal Invested'))
               .withColumn('Profits Received', calc_profits_received('Total Repayment', 
                                                                      'Principal Invested', 
                                                                      'Interest Rate', 
                                                                      'Total Payment Cycles'))
               .withColumn('Profits Outstanding', calc_profits_outstanding('Total Repayment',
                                                                           'Principal Invested',
                                                                           'Interest Rate',
                                                                           'Total Payment Cycles')))
    
    # Obtain list of companies
    companiesList = _convert_to_list(notesDF, 'Company')
    
    # Create traces
    traceAmountPaid = go.Bar(
        x = _convert_to_list(notesDF, 'Total Repayment'),
        y = companiesList,
        name = 'Amount Repayed',
        orientation = 'h'
    )
    
    traceAmountOustanding = go.Bar(
        x = _convert_to_list(notesDF, 'Amount Outstanding'),
        y = companiesList,
        name = 'Amount Outstanding',
        orientation = 'h'
    )
    
    traceFee = go.Bar(
        x = _convert_to_list(notesDF, 'Fees Accrued'),
        y = companiesList,
        name = 'Fees Accrued',
        orientation = 'h'
    )
    
    traceProfitsReceived = go.Bar(
        x = _convert_to_list(notesDF, 'Profits Received'),
        y = companiesList,
        name = 'Profit Received',
        orientation = 'h'
    )
    
    traceProfitsOutstanding = go.Bar(
        x = _convert_to_list(notesDF, 'Profits Outstanding'),
        y = companiesList,
        name = 'Profit Outstanding',
        orientation = 'h'
    )
    
    data = go.Data([traceAmountPaid, traceAmountOustanding, traceFee, traceProfitsReceived, traceProfitsOutstanding])
    
    layout = go.Layout(
        xaxis = {
            'domain': [0.2, 1]
        },
        barmode = 'stacked'
    )
    
    figure = go.Figure(data=data, layout=layout)
    
    pyo.iplot(figure)

In [51]:
progress_bar(notesDF)