# Lending Loop Web App
This is a prototype for a Lending Loop web app / dashboard. This notebook focuses on the logics of Lending Loop summary calculations.

# System Initializations

### Package Initializations

In [1]:
# DataFrames
import pandas as pd
import findspark
import pyspark
import pyspark.sql.functions as F
import pyspark.sql.types as T

# Databases
import pymongo

# Misc Packages
import os

# Visualizations
import plotly
import plotly.offline as pyo
import plotly.graph_objs as go

### PySpark Session Initialization

In [2]:
packages = 'org.mongodb.spark:mongo-spark-connector_2.11:2.2.0'
dedicated_memory = '1g'

os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages {} --driver-memory {} pyspark-shell' \
    .format(packages, dedicated_memory)

In [3]:
# Find SPARK_HOME
findspark.init()

# Create SparkSession
spark = (pyspark.sql.SparkSession
         .builder.appName('LendingLoop')
         .getOrCreate())

### Use Cases
- Dashboard integration
    - How much was invested?
    - Net rate of return
    - Distribution of notes
    - Diversification index
    - Bar chart showing amount regained from each note
- Update Lending Loop transactions (hopefully somewhat automatically)

### TODO
- Develop one-time database initialization technique (Indexing function might be done separately)
- Develop method to update info 
- Figure out if states are saved or inferred through transactions
    - Probably have a transaction collection and a state collection. Update `state` using `transaction`s if `transaction`'s input time was later than `state`'s update time
- Figure out collection schemas
- Figure out how the overall scheme would look like
    - Payment Bar Chart Visualization
        - Bar chart showing amount left to recovering investment (and amount of interest earned)
        - Overall (x-axis: All note grades) (maybe use a windrose?)
        - By group (x-axis: All active notes within group)
        - By lists of invidiual notes
    - Distribution of Notes Histogram
        - x-axis (Note Grade)
        - y-axis (% of invested income)
        - Hoverinfo (% invested income, amount invested, number of notes)
    - Weighted Average Net ROI
        - Text Displaying Weighted Average Net ROI 

### Helper Function Definitions

In [None]:
def import_to_mongo(path, coll, db='lendingLoop', create_index=True):
    '''
    Unzip and import json.gz file from `path` and loads it into mongo server.
    Create database index if `create_index` is True. 
    '''
    # Load existing info from csv file
    # Create search index
    
    # Obtain handle to Mongo database and collection
    client = pymongo.MongoClient()
    collection = client[db][coll]
    
    # Return prematurely if database.collection already exists
    if (collection.count() != 0):
        print '{}.{} already exists on MongoDisk server. Exiting without loading JSON data.'.format(db, coll)
        return
    
    # Insert datapoints into Mongo database
    try:
        collection.insert_many((datapoint for datapoint in parse(path)))
        print 'JSON data successfully imported to Mongo at \'{}.{}.\''.format(db, coll)
    except Exception as e:
        print 'Error loading data.\n{}'.format(e)
        client.close()
        return
    
    if not create_index:
        client.close()
        return
    
    # Create database index for improved searching
    # collection.create_index([('asin', pymongo.ASCENDING), ('reviewerID', pymongo.DESCENDING)])

def load_mongo_to_spark(coll, db='hackon'):
    '''
    Load the Mongo database to a Spark Session and returns the Spark DataFrame
    '''
    try:
        return (spark
                .read
                .format('com.mongodb.spark.sql.DefaultSource')
                .option('uri', 'mongodb://127.0.0.1/{}.{}'.format(db, coll))
                .load())
    except Exception as e:
        print 'Failed to create Spark dataframe.\n{}'.format(e)

def displayDF(sparkDF, n=10):
    '''
    Interactively displays the first n rows of a sparkDF as a pandas dataframe
    '''
    return (sparkDF
            .limit(n)
            .drop('_id', 'unixReviewTime')
            .toPandas())