In [1]:
# ####################################
# Settings & Imports
# ####################################

# Imports from __future__ in case we're running Python 2
from __future__ import division, print_function
from __future__ import absolute_import, unicode_literals

# import my own helper functions
from read import read_sims_result
from clean import cleanup_0IR_exp
from clean import cleanup_0IR_single

# Our numerical workhorses
import numpy as np

# Import pyplot for plotting
import matplotlib.pyplot as plt

# import logistic regression from scikit learn 
from sklearn.linear_model import LogisticRegression

# import model selection stuff from scikit learn
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

# import feature selection stuff from scikit learn
from sklearn.feature_selection import RFE

# sklearn.metrics
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import matthews_corrcoef
from sklearn.metrics import brier_score_loss

#
import statsmodels.api as sm

#
# from sklearn import preprocessing

# Import pandas
import pandas as pd

# Magic function to make matplotlib inline; other style specs must come AFTER
%matplotlib inline

# This enables SVG graphics inline.  There is a bug, so uncomment if it works.
# %config InlineBackend.figure_formats = {'svg',}

# This enables high resolution PNGs. SVG is preferred, but has problems
# rendering vertical and horizontal lines
# %config InlineBackend.figure_formats = {'png', 'retina'}

# JB's favorite Seaborn settings for notebooks
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 18, 
      'axes.titlesize': 18, 
      'axes.facecolor': 'DFDFE5'}

# remove some pandas warning
pd.options.mode.chained_assignment = None

In [3]:
# independent variables (candidates)
independent = ["deposits", "cash", "assets", "credit available", "wealth", "leverage", 
         "dummy-0-leverage",
         "wealth-lag", "deposits-lag", "cash-lag", "assets-lag", "leverage-lag", 
         "credit-available-lag", "credit-issued-lag", "dummy-0-leverage-lag",
         "over-leverage-frequency"]
# dependent variable 0
dependent = "default-next"

In [24]:
# ####################################
# Read input, toggle env variable here
# ####################################
train_on_file = "0627/0IR900s"
df = read_sims_result("/Users/xcheng/Documents/Oberlin/Summer2/DataAnalysis/data/"+train_on_file, 32)


In [25]:
# ################################
# Prepare to predict & write to file
# ################################

for cases in range(500,900,55):
    df2 = cleanup_0IR_exp(df, numNode=32, numPeriod=15, numSim=cases, balanced=True)

    X = df2[independent]
    y = df2[dependent]

    # Fit the model on all data
    final = LogisticRegression(penalty="l1", C=inv_reg_strength)
    final.fit(X,y)

    for ir in range(1,3,1):
        predict_on_file = "0719/" + str(ir) + "IR100s"
        df_1 =  read_sims_result("/Users/xcheng/Documents/Oberlin/Summer2/DataAnalysis/data/"+predict_on_file, 32)
        df_1c = cleanup_0IR_exp(df_1, numNode=32, numPeriod=15, numSim=100, balanced=False)

        accuracy_to_file = final.score(df_1c[independent], df_1c[dependent])
        brier_to_file = brier_score_loss(df_1c[dependent], final.predict(df_1c[independent]))
        confusion_to_file = confusion_matrix(df_1c[dependent], final.predict(df_1c[independent]))

        o_string = "\n{:d},{:s},{:f},{:s},{:f},{:f},{:f},{:f},{:d},{:d},{:d},{:d},".format(
                        len(df2),
                        train_on_file,
                        inv_reg_strength,
                        predict_on_file,
                        accuracy_to_file,
                        brier_to_file,
                        confusion_to_file[1,1]/(confusion_to_file[1,1]+confusion_to_file[1,0]),
                        confusion_to_file[1,1]/(confusion_to_file[1,1]+confusion_to_file[0,1]),
                        confusion_to_file[1,1],
                        confusion_to_file[1,0],
                        confusion_to_file[0,0],
                        confusion_to_file[0,1],
                    )

        with open("/Users/xcheng/Documents/Oberlin/Summer2/DataAnalysis/prediction.csv", "a") as f:
            f.write(o_string)