In [1]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set()

import eli5
from eli5.sklearn import PermutationImportance

from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import RandomizedSearchCV

from sklearn.metrics import classification_report


def reduce_mem_usage(df):
    
    start_mem = df.memory_usage().sum() / 1024**2
    
    for col in df.columns:
        col_type = df[col].dtype
        
        if col_type != object:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
 
                if c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)

    end_mem = df.memory_usage().sum() / 1024**2
    print('Memory usage of dataframe is {:.2f} MB --> {:.2f} MB (Decreased by {:.1f}%)'.format(
        start_mem, end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [2]:
data = reduce_mem_usage(pd.read_csv('final2_prosper_dataset.csv'))
data['average_income'] = (data['MinIncome']+ data['MaxIncome'])/2
data.drop('Unnamed: 0',axis=1,inplace=True)
data.drop(["ListingCreationDate","FirstRecordedCreditLine","IncomeRange","LoanOriginationDate","DateCreditPulled"],axis=1,inplace=True)
risk = ['ProsperRating (numeric)','ProsperRating (Alpha)','ProsperScore','high_risk']
CreditScore = ['CreditScoreRangeLower','CreditScoreRangeUpper']
data['ProsperRating (Alpha)'].value_counts(normalize=True)

Memory usage of dataframe is 81.09 MB --> 33.53 MB (Decreased by 58.6%)


C     0.417278
B     0.135921
A     0.127271
D     0.125325
E     0.085932
HR    0.061177
AA    0.047096
Name: ProsperRating (Alpha), dtype: float64

In [3]:
df = data.copy()
df.drop(CreditScore,axis=1,inplace=True)
df = df.drop(risk,axis=1).drop('LoanKey',axis=1)
df = df.drop(['MinIncome','MaxIncome'],axis=1)
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113066 entries, 0 to 113065
Data columns (total 80 columns):
 #   Column                              Non-Null Count   Dtype  
---  ------                              --------------   -----  
 0   CreditGrade                         113066 non-null  object 
 1   Term                                113066 non-null  int8   
 2   BorrowerAPR                         113066 non-null  float32
 3   BorrowerRate                        113066 non-null  float32
 4   LenderYield                         113066 non-null  float32
 5   EstimatedEffectiveYield             113066 non-null  float32
 6   EstimatedLoss                       113066 non-null  float32
 7   EstimatedReturn                     113066 non-null  float32
 8   ListingCategory (numeric)           113066 non-null  int8   
 9   BorrowerState                       113066 non-null  object 
 10  Occupation                          113066 non-null  object 
 11  EmploymentStatus          

In [4]:
X = pd.get_dummies(df,columns=[col for col in df.columns if 'O' == df[col].dtype])
X.head()

Unnamed: 0,Term,BorrowerAPR,BorrowerRate,LenderYield,EstimatedEffectiveYield,EstimatedLoss,EstimatedReturn,ListingCategory (numeric),EmploymentStatusDuration,IsBorrowerHomeowner,...,Occupation_Truck Driver,Occupation_Waiter/Waitress,EmploymentStatus_Employed,EmploymentStatus_Full-time,EmploymentStatus_Not available,EmploymentStatus_Not employed,EmploymentStatus_Other,EmploymentStatus_Part-time,EmploymentStatus_Retired,EmploymentStatus_Self-employed
0,36,0.16516,0.158,0.138,0.16162,0.0724,0.09211,0,2.0,1,...,0,0,0,0,0,0,0,0,0,1
1,36,0.12016,0.092,0.082,0.0796,0.0249,0.0547,2,44.0,0,...,0,0,1,0,0,0,0,0,0,0
2,36,0.28269,0.275,0.24,0.16162,0.0724,0.09211,0,67.0,0,...,0,0,0,0,1,0,0,0,0,0
3,36,0.12528,0.0974,0.0874,0.0849,0.0249,0.06,16,113.0,1,...,0,0,1,0,0,0,0,0,0,0
4,36,0.24614,0.2085,0.1985,0.18316,0.0925,0.09066,2,44.0,1,...,0,0,1,0,0,0,0,0,0,0


In [5]:
data.loc[data["ProsperScore"]==11,"ProsperScore"]=10

In [11]:
X_train,X_test,y_train,y_test = train_test_split(X,data['ProsperScore'],test_size=0.25,random_state=1111)

model4 = DecisionTreeClassifier()

searcher4 = GridSearchCV(model4, {    
    'max_depth':[1,2,3,4,5,6],
    'min_samples_leaf': [3,4,5,6,7,8,9,10],
    'criterion': ['gini','entropy', 'log_loss'],
}, cv=5)


searcher4.fit(X_train, y_train)

print("Best CV params", searcher4.best_params_)

best_Dt4 = searcher4.best_estimator_

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 898, in fit
    super().fit(
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\tree\_classes.py", line 347, in fit
    criterion = CRITERIA_CLF[self.criterion](self.n_outputs_,
KeyError: 'log_loss'

Tracebac

Best CV params {'criterion': 'entropy', 'max_depth': 6, 'min_samples_leaf': 3}


In [12]:
permDt4 = PermutationImportance(searcher4, random_state=1).fit(X_test, y_test)
eli5.show_weights(permDt4, feature_names = X_test.columns.tolist())

Weight,Feature
0.3045  ± 0.0036,EstimatedLoss
0.1401  ± 0.0027,LoanStatus_Completed
0.1278  ± 0.0018,LoanMonthsSinceOrigination
0.1225  ± 0.0021,Is_CreditGradeApplicable
0.0580  ± 0.0007,exclude_recoveries
0.0411  ± 0.0007,had_PriorLoans
0.0345  ± 0.0004,LoanStatus_Defaulted
0.0232  ± 0.0015,BorrowerAPR
0.0143  ± 0.0021,CreditScoreAverage
0.0052  ± 0.0005,Term


In [13]:
y_predDt4 = best_Dt4.predict(X_test)
print(classification_report(y_test,y_predDt4))

              precision    recall  f1-score   support

         1.0       0.33      0.60      0.42       227
         2.0       0.58      0.51      0.55      1429
         3.0       0.40      0.24      0.30      1850
         4.0       0.33      0.70      0.45      3020
         5.0       0.81      0.46      0.59      4071
         6.0       0.63      0.47      0.54      4158
         7.0       0.73      0.80      0.77      7135
         8.0       0.52      0.57      0.55      3037
         9.0       0.67      0.49      0.57      1770
        10.0       0.83      0.59      0.69      1570

    accuracy                           0.59     28267
   macro avg       0.58      0.55      0.54     28267
weighted avg       0.63      0.59      0.59     28267



In [14]:
print(classification_report(y_train, best_Dt4.predict(X_train)))

              precision    recall  f1-score   support

         1.0       0.35      0.65      0.45       719
         2.0       0.55      0.49      0.52      4277
         3.0       0.39      0.24      0.30      5720
         4.0       0.34      0.70      0.46      9423
         5.0       0.80      0.45      0.57     12317
         6.0       0.61      0.47      0.53     12016
         7.0       0.74      0.80      0.77     21764
         8.0       0.52      0.58      0.55      8916
         9.0       0.66      0.50      0.57      5095
        10.0       0.84      0.57      0.68      4552

    accuracy                           0.58     84799
   macro avg       0.58      0.55      0.54     84799
weighted avg       0.63      0.58      0.59     84799



# ---------------------------------------------------------------------------------------------------

### let's try some ensemble models ( RandomForestClassifier )

##### it's not a good idea to use GridSearchCV  because of the parameters 

In [17]:
X_train,X_test,y_train,y_test = train_test_split(X,data['ProsperScore'],test_size=0.25,random_state=1111)

modelRFC = RandomForestClassifier(random_state=1111)

searcher5 = RandomizedSearchCV(modelRFC, {  
    
    'n_estimators':[50,100,150,200,250,300,350,400],
    
    'min_samples_split':[3,4,5,6,7],
    
    'max_depth':[1,2,3,4,5,6],
    
    'min_samples_leaf': [3,4,5,6,7,8,9,10],
    
    'criterion': ['gini','entropy', 'log_loss'],
    
    'min_weight_fraction_leaf':[0,0.0001,0.0005,0.001,0.005,0.01,0.03,0.06,0.1,0.2,0.4,0.6,0.8,1],
    
    'max_features': ['sqrt', 'log2', None],
    
    'max_leaf_nodes' : list(range(10,100)),
    
    #'min_impurity_decrease':[],
    
    #'bootstrap':[],
    
    #'oob_score': [True,False],
    
    #'warm_start': [True,False],
    
    #'class_weight':['balanced', 'balanced_subsample', None],
    
    #'ccp_alpha':[],
    
    #'max_samples':[None,0.0001,0.0005,0.001,0.005,0.01,0.03,0.06,0.1,0.2,0.4,0.6,0.8,1],
    
}, cv=5)


searcher5.fit(X_train, y_train)

print("Best CV params", searcher5.best_params_)

best_RFC = searcher5.best_estimator_

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backen

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backen

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backen

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backen

Best CV params {'n_estimators': 150, 'min_weight_fraction_leaf': 0, 'min_samples_split': 7, 'min_samples_leaf': 5, 'max_leaf_nodes': 54, 'max_features': 'sqrt', 'max_depth': 6, 'criterion': 'gini'}


In [18]:
best_RFC.feature_importances_

array([1.60063342e-03, 3.30550237e-02, 3.10105868e-02, 3.21592333e-02,
       4.31910120e-02, 4.96753077e-02, 2.67940958e-02, 1.47727280e-02,
       3.60897055e-04, 1.26667765e-04, 1.43662787e-03, 5.93929385e-04,
       5.88247162e-04, 2.20755370e-04, 3.13535996e-04, 6.20141118e-04,
       2.98612096e-03, 1.16773548e-03, 6.85391792e-04, 2.40185385e-04,
       1.72655025e-04, 1.49292790e-04, 4.16663970e-06, 5.57719800e-04,
       1.70153026e-03, 4.93293448e-03, 4.89557253e-04, 1.01169752e-03,
       4.78235197e-04, 3.96722406e-03, 3.44870243e-04, 1.08568700e-03,
       5.34784456e-02, 7.28059573e-02, 4.87189266e-03, 3.46067463e-03,
       1.27065782e-02, 2.40259698e-02, 3.19880598e-03, 4.23833205e-03,
       3.41447336e-03, 3.45402233e-02, 4.22459541e-02, 3.40830448e-03,
       5.57763875e-06, 2.24199438e-04, 2.64074859e-05, 3.40698294e-05,
       5.54880148e-03, 5.97439391e-02, 2.93160274e-03, 3.91114798e-02,
       4.49663156e-02, 5.23191151e-04, 4.74024856e-07, 5.54517887e-02,
      

In [19]:
permRFC = PermutationImportance(searcher5, random_state=1).fit(X_test, y_test)
eli5.show_weights(permRFC, feature_names = X_test.columns.tolist())

Weight,Feature
0.0294  ± 0.0022,LoanMonthsSinceOrigination
0.0272  ± 0.0032,CreditGrade_Not_Applicable
0.0205  ± 0.0012,Is_CreditGradeApplicable
0.0183  ± 0.0022,EstimatedLoss
0.0156  ± 0.0022,after_July2009
0.0114  ± 0.0014,EstimatedEffectiveYield
0.0078  ± 0.0013,BorrowerRate
0.0078  ± 0.0011,EstimatedReturn
0.0076  ± 0.0010,LenderYield
0.0059  ± 0.0014,BorrowerAPR


In [20]:
y_predRFC = best_RFC.predict(X_test)
print(classification_report(y_test,y_predRFC))

              precision    recall  f1-score   support

         1.0       0.00      0.00      0.00       227
         2.0       0.58      0.44      0.50      1429
         3.0       0.00      0.00      0.00      1850
         4.0       0.30      0.74      0.43      3020
         5.0       0.89      0.45      0.60      4071
         6.0       0.51      0.50      0.50      4158
         7.0       0.85      0.69      0.76      7135
         8.0       0.36      0.72      0.48      3037
         9.0       0.73      0.20      0.32      1770
        10.0       0.72      0.57      0.64      1570

    accuracy                           0.54     28267
   macro avg       0.49      0.43      0.42     28267
weighted avg       0.60      0.54      0.53     28267



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [21]:
print(classification_report(y_train, best_RFC.predict(X_train)))

              precision    recall  f1-score   support

         1.0       0.00      0.00      0.00       719
         2.0       0.56      0.42      0.48      4277
         3.0       0.00      0.00      0.00      5720
         4.0       0.31      0.75      0.44      9423
         5.0       0.89      0.43      0.58     12317
         6.0       0.50      0.50      0.50     12016
         7.0       0.85      0.69      0.76     21764
         8.0       0.36      0.74      0.48      8916
         9.0       0.72      0.21      0.32      5095
        10.0       0.74      0.57      0.64      4552

    accuracy                           0.54     84799
   macro avg       0.49      0.43      0.42     84799
weighted avg       0.60      0.54      0.53     84799



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


# -------------------------------------------------------------------------------


In [22]:
imp_fea=['LoanMonthsSinceOrigination',
'CreditGrade_Not_Applicable',
'Is_CreditGradeApplicable',
'EstimatedLoss',
'after_July2009',
'EstimatedEffectiveYield',
'BorrowerRate',
'EstimatedReturn',
'LenderYield',
'BorrowerAPR',
'DebtToIncomeRatio',
'had_PriorLoans',
'EmploymentStatus_Full-time',
'ListingCategory (numeric)',
'Investors',
'LP_CustomerPrincipalPayments',
'IsClosed',
'LP_CustomerPayments',
'LoanStatus_Current',
'CreditScoreAverage']

In [23]:
X_train,X_test,y_train,y_test = train_test_split(X[imp_fea],data['ProsperScore'],test_size=0.25,random_state=1111)

modelRFC2 = RandomForestClassifier(random_state=1111)

searcher52 = RandomizedSearchCV(modelRFC2, {  
    
    'n_estimators':[50,100,150,200,250,300,350,400],
    
    'min_samples_split':[3,4,5,6,7,8],
    
    'max_depth':[1,2,3,4,5,6,7,8,9,10,11,12],
    
    'min_samples_leaf': [3,4,5,6,7,8,9,10],
    
    'criterion': ['gini','entropy', 'log_loss'],
    
    'min_weight_fraction_leaf':[0,0.0001,0.0005,0.001,0.005,0.01,0.03,0.06,0.1,0.2,0.4,0.6,0.8,1],
    
    'max_features': ['sqrt', 'log2', None],
    
    'max_leaf_nodes' : list(range(10,100)),
    
    #'min_impurity_decrease':[],
    
    #'bootstrap':[],
    
    #'oob_score': [True,False],
    
    #'warm_start': [True,False],
    
    #'class_weight':['balanced', 'balanced_subsample', None],
    
    #'ccp_alpha':[],
    
    #'max_samples':[None,0.0001,0.0005,0.001,0.005,0.01,0.03,0.06,0.1,0.2,0.4,0.6,0.8,1],
    
}, cv=5)


searcher52.fit(X_train, y_train)

print("Best CV params", searcher52.best_params_)

best_RFC2 = searcher52.best_estimator_

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backen

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backen

Traceback (most recent call last):
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\model_selection\_validation.py", line 593, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\m2021\anaconda3\lib\site-packages\sklearn\ensemble\_forest.py", line 387, in fit
    trees = Parallel(n_jobs=self.n_jobs, verbose=self.verbose,
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 1041, in __call__
    if self.dispatch_one_batch(iterator):
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 859, in dispatch_one_batch
    self._dispatch(tasks)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\parallel.py", line 777, in _dispatch
    job = self._backend.apply_async(batch, callback=cb)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backends.py", line 208, in apply_async
    result = ImmediateResult(func)
  File "C:\Users\m2021\anaconda3\lib\site-packages\joblib\_parallel_backen

Best CV params {'n_estimators': 350, 'min_weight_fraction_leaf': 0, 'min_samples_split': 6, 'min_samples_leaf': 10, 'max_leaf_nodes': 71, 'max_features': None, 'max_depth': 8, 'criterion': 'entropy'}


In [24]:
y_predRFC2 = best_RFC2.predict(X_test)
print(classification_report(y_test,y_predRFC2))

              precision    recall  f1-score   support

         1.0       0.49      0.52      0.51       227
         2.0       0.61      0.51      0.55      1429
         3.0       0.39      0.34      0.36      1850
         4.0       0.38      0.63      0.47      3020
         5.0       0.54      0.39      0.45      4071
         6.0       0.56      0.46      0.50      4158
         7.0       0.67      0.80      0.73      7135
         8.0       0.61      0.58      0.59      3037
         9.0       0.77      0.42      0.55      1770
        10.0       0.71      0.65      0.68      1570

    accuracy                           0.57     28267
   macro avg       0.57      0.53      0.54     28267
weighted avg       0.58      0.57      0.56     28267



In [25]:
print(classification_report(y_train, best_RFC2.predict(X_train)))

              precision    recall  f1-score   support

         1.0       0.52      0.57      0.54       719
         2.0       0.58      0.48      0.53      4277
         3.0       0.38      0.33      0.35      5720
         4.0       0.38      0.63      0.48      9423
         5.0       0.53      0.38      0.44     12317
         6.0       0.54      0.44      0.49     12016
         7.0       0.68      0.80      0.73     21764
         8.0       0.61      0.60      0.60      8916
         9.0       0.75      0.43      0.55      5095
        10.0       0.72      0.64      0.68      4552

    accuracy                           0.57     84799
   macro avg       0.57      0.53      0.54     84799
weighted avg       0.58      0.57      0.56     84799



# -------------------------------------------------------------------------------------------------

In [26]:
modelRFC3 = RandomForestClassifier(random_state=1111)
X_train,X_test,y_train,y_test = train_test_split(X,data['ProsperScore'],test_size=0.25,random_state=1111)
modelRFC3.fit(X_train, y_train)
y_predRFC3 = modelRFC3.predict(X_test)
print(classification_report(y_test,y_predRFC3))

              precision    recall  f1-score   support

         1.0       0.68      0.67      0.67       227
         2.0       0.63      0.59      0.61      1429
         3.0       0.54      0.39      0.45      1850
         4.0       0.47      0.69      0.56      3020
         5.0       0.74      0.62      0.67      4071
         6.0       0.65      0.66      0.66      4158
         7.0       0.82      0.83      0.83      7135
         8.0       0.63      0.67      0.65      3037
         9.0       0.71      0.51      0.60      1770
        10.0       0.77      0.77      0.77      1570

    accuracy                           0.68     28267
   macro avg       0.66      0.64      0.65     28267
weighted avg       0.69      0.68      0.68     28267



In [27]:
permRFC3 = PermutationImportance(modelRFC3, random_state=1).fit(X_test, y_test)
eli5.show_weights(permRFC3, feature_names = X_test.columns.tolist())

Weight,Feature
0.0432  ± 0.0019,LoanMonthsSinceOrigination
0.0431  ± 0.0008,EstimatedLoss
0.0317  ± 0.0013,CreditScoreAverage
0.0270  ± 0.0014,had_PriorLoans
0.0229  ± 0.0016,BorrowerAPR
0.0186  ± 0.0014,EstimatedEffectiveYield
0.0152  ± 0.0022,EstimatedReturn
0.0150  ± 0.0023,DebtToIncomeRatio
0.0146  ± 0.0023,BorrowerRate
0.0135  ± 0.0023,LenderYield


In [28]:
print(classification_report(y_train, permRFC3.predict(X_train)))

              precision    recall  f1-score   support

         1.0       1.00      1.00      1.00       719
         2.0       1.00      1.00      1.00      4277
         3.0       1.00      1.00      1.00      5720
         4.0       1.00      1.00      1.00      9423
         5.0       1.00      1.00      1.00     12317
         6.0       1.00      1.00      1.00     12016
         7.0       1.00      1.00      1.00     21764
         8.0       1.00      1.00      1.00      8916
         9.0       1.00      1.00      1.00      5095
        10.0       1.00      1.00      1.00      4552

    accuracy                           1.00     84799
   macro avg       1.00      1.00      1.00     84799
weighted avg       1.00      1.00      1.00     84799



# -------------------------------------------------------------------------------------------------

In [30]:
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
X_train,X_test,y_train,y_test = train_test_split(X,data['ProsperScore'],test_size=0.25,random_state=1111)

modelGBC = GradientBoostingClassifier(random_state=1111)

searcher6 = RandomizedSearchCV(modelGBC, {  
    'loss' : ['log_loss', 'deviance', 'exponential'],
    'learning_rate' : list(np.linspace(0.001,1)),
    'n_estimators' : [50,100,150,200,250,300,350,400],
    'min_samples_leaf':list(range(3,100)),
    'min_samples_split':list(range(3,10)),
    'max_depth':list(range(3,15)),
    'max_leaf_nodes': list(range(20,50)),
}, cv=5)


searcher6.fit(X_train, y_train)

print("Best CV params", searcher6.best_params_)

best_GBC = searcher6.best_estimator_

In [None]:
y_predGBC = best_GBC.predict(X_test)
print(classification_report(y_test, y_predGBC))

In [None]:
print(classification_report(y_train, best_GBC.predict(X_train)))

In [None]:
permGBC = PermutationImportance(searcher6, random_state=1).fit(X_test, y_test)
eli5.show_weights(permGBC, feature_names = X_test.columns.tolist())