# Supervised Learning Walkthrough

![MTSU Data Science Institute](images/MTDataScienceInstituteHorizontal.jpg)

### Summary of Notebook
#### Created by Charlie H. Apigian, PhD.  - Interim Director of the Data Science Institute at MTSU
<br>
#### Contents
- Frame the Business Problem
- Import Libraries
- Import Data
- Explore the Data
- Cleanse the Data
- Transform the Data
- Split the Data
- Select and Run the Model
- Fine Tune the Model


# --------------------------------------------------------------------------------
# XXXXXXXXXXXXX  What is the Business Problem XXXXXXXXXXXXX
# --------------------------------------------------------------------------------

# --------------------------------------------------------------------------------
# XXXXXXXXXXXXXXXXXX  Import Libraries XXXXXXXXXXXXXXXXXX
# --------------------------------------------------------------------------------

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

sns.set_style('whitegrid')

# --------------------------------------------------------------------------------
# XXXXXXXXXXXXXXXXXXX  Import Data XXXXXXXXXXXXXXXXXXX
# --------------------------------------------------------------------------------

### Import the data and create dataframes and arrays that can then be cleansed, transformed, and split.

In [None]:
#import requests

#url_loandata="https://raw.githubusercontent.com/capigian/AnalyticsSummit/master/Apigian_NashAnalyticsPresentation/data/Loan_Data.csv"

#df_loandata_url=pd.read_csv(url_loandata, index_col = 1, header = 0)

In [None]:
#df_loandata_url.head()

In [None]:
df_loandata = pd.read_csv('data/Loan_Data.csv', index_col = 0, header = 0)
df_loandata.info()

In [None]:
df_loandata.head()

# --------------------------------------------------------------------------------
# XXXXXXXXXXXXXXXXXX  Explore the Data XXXXXXXXXXXXXXXXXX
# --------------------------------------------------------------------------------

### Why explore before cleanse?  Exploring and cleansing may be conducted simultaneously, but you cannot cleanse without looking at the data first

https://seaborn.pydata.org/examples/index.html

In [None]:
df_loandata.head(25)
# index on left side and NaN???

## Describe the data
## XXXXXXXXXXXXX
### Shows a list of summary statistics - what to look for?
- Min and max for outliers
- Count to see how many columns have NaN values
![PandasSummaryStatistics.png](images/PandasSummaryStatistics.png)
https://pandas.pydata.org/pandas-docs/stable/basics.html

In [None]:
df_loandata.describe()

In [None]:
df_loandata['annual_inc'].median()

In [None]:
df_loandata['annual_inc'].mean()

In [None]:
df_loandata['annual_inc'].kurt()

In [None]:
df_loandata.corr()

In [None]:
df_loandata[['loan_amnt', 'annual_inc', 'revol_bal', 'total_acc', 'tot_coll_amt']].corr()

In [None]:
df_loandata.term.value_counts() #Should probably encode term as 0 or 1

In [None]:
df_loandata_explore = df_loandata[['loan_amnt', 'emp_length', 'annual_inc', 'revol_bal', 'tot_cur_debt', 'total_credit_rv', 'purpose']]

In [None]:
df_loandata_explore.info()

In [None]:
sns.pairplot(df_loandata_explore, hue = 'purpose')

In [None]:
df_loandata_explore_num = df_loandata_explore.drop('purpose', axis = 1)

In [None]:
colormap = plt.cm.viridis
plt.figure(figsize=(14,12))
plt.title('Pearson Correlation of Features', y=1.05, size=15)
sns.heatmap(df_loandata_explore_num.astype(float).corr(),linewidths=0.1,vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True)

In [None]:
sns.set(style="ticks", palette="pastel")
ax2 = sns.boxplot(y='loan_amnt', x = 'purpose',  data=df_loandata_explore)


In [None]:
df_loandata_explore.info()

In [None]:
f, axes = plt.subplots(3, 2, figsize=(15, 15), sharex=True)
sns.boxplot(y='loan_amnt', x = 'purpose',  data=df_loandata_explore, ax=axes[0,0])
sns.boxplot(y='emp_length', x = 'purpose',  data=df_loandata_explore, ax=axes[0,1])
sns.boxplot(y='annual_inc', x = 'purpose',  data=df_loandata_explore, ax=axes[1,0])
sns.boxplot(y='revol_bal', x = 'purpose',  data=df_loandata_explore, ax=axes[1,1])
sns.boxplot(y='tot_cur_debt', x = 'purpose',  data=df_loandata_explore, ax=axes[2,0])
sns.boxplot(y='total_credit_rv', x = 'purpose',  data=df_loandata_explore, ax=axes[2,1])

# --------------------------------------------------------------------------------
# XXXXXXXXXXXXXXXXXX  Cleanse the  Data XXXXXXXXXXXXXXXXXX
# --------------------------------------------------------------------------------

In [None]:
df_loandata.info()

## Fill with a specific value

- Change all NaN values to 0.

In [None]:
df_loandata.mths_since_last_delinq.isnull().sum()

In [None]:
df_loandata['mths_since_last_delinq'] = df_loandata['mths_since_last_delinq'].fillna(0)

In [None]:
df_loandata.info()

## Fill in the mean for total_acc

In [None]:
pd.pivot_table(df_loandata,index=["purpose"], values=["total_acc"])

In [None]:
mean_value = df_loandata['total_acc'].mean()
df_loandata['total_acc'] = df_loandata['total_acc'].fillna(mean_value)

In [None]:
df_loandata.info()

## What is going on with Annual Income?

In [None]:
df_loandata['annual_inc'].describe()

In [None]:
df_loandata['annual_inc'].median()

In [None]:
df_loandata.head()

In [None]:
df_loandata[df_loandata['annual_inc'] >10000000]

### Mistake in entering the annual_inc - forgot to add the decimal places - should be 123,600.00 not 12,360,000.

In [None]:
df_loandata.loc[[2968827], ["annual_inc"]]

In [None]:
df_loandata.loc[[2968827], ["annual_inc"]]=123600

In [None]:
df_loandata.loc[[2968827], ["annual_inc"]]

In [None]:
sns.boxplot(y='annual_inc', x = 'purpose',  data=df_loandata)

In [None]:
f, axes = plt.subplots(2, 2, figsize=(15, 15), sharex=True)
sns.boxplot(y='annual_inc', x = 'purpose',  data=df_loandata, ax=axes[0,0])
sns.boxplot(y='revol_bal', x = 'purpose',  data=df_loandata, ax=axes[0,1])
sns.boxplot(y='tot_cur_debt', x = 'purpose',  data=df_loandata, ax=axes[1,0])
sns.boxplot(y='total_credit_rv', x = 'purpose',  data=df_loandata, ax=axes[1,1])

In [None]:
df_loandata.drop(df_loandata[df_loandata.annual_inc > 1000000].index, inplace=True)
df_loandata.drop(df_loandata[df_loandata.revol_bal > 750000].index, inplace=True)
df_loandata.drop(df_loandata[df_loandata.tot_cur_debt > 3000000].index, inplace=True)
df_loandata.drop(df_loandata[df_loandata.total_credit_rv > 750000].index, inplace=True)

In [None]:
f, axes = plt.subplots(2, 2, figsize=(15, 15), sharex=True)
sns.boxplot(y='annual_inc', x = 'purpose',  data=df_loandata, ax=axes[0,0])
sns.boxplot(y='revol_bal', x = 'purpose',  data=df_loandata, ax=axes[0,1])
sns.boxplot(y='tot_cur_debt', x = 'purpose',  data=df_loandata, ax=axes[1,0])
sns.boxplot(y='total_credit_rv', x = 'purpose',  data=df_loandata, ax=axes[1,1])

In [None]:
sns.boxplot(x='annual_inc', y = 'purpose' ,data=df_loandata)

# --------------------------------------------------------------------------------
# XXXXXXXXXXXXXXXXX  Transform the Data XXXXXXXXXXXXXXXXX
# --------------------------------------------------------------------------------

## Re-coding of categorical data

<br>
### 3 ways to recode categorical data
- **label encoder** - changes categories to integers based on alphabetical order
- **hot one encoder** - changes one column of categorical data into several binary (dummy) columns
- **use a custom function** for changing categories to integers

## Re-Label using Label Encoding

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
lc = LabelEncoder()

In [None]:
df_loandata.sub_grade.value_counts()

In [None]:
df_loandata.head()

In [None]:
df_loandata['sub_grade'] = lc.fit_transform(df_loandata['sub_grade'])

In [None]:
df_loandata.sub_grade.value_counts()

## Dummy Variables - Hot one encoding

In [None]:
dummies_home = pd.get_dummies(df_loandata['home_ownership'], drop_first = False)
dummies_purpose = pd.get_dummies(df_loandata['purpose'], drop_first = False)

In [None]:
dummies_home.info()
dummies_purpose.info()

In [None]:
df_loandata = pd.concat([df_loandata, dummies_home, dummies_purpose], axis = 1)
df_loandata.info()

In [None]:
df_loandata = df_loandata.drop(["home_ownership", "purpose"], axis = 1)

In [None]:
df_loandata.info()

In [None]:
df_loandata.head()

## Create own numerical category

In [None]:
df_loandata.loan_status.value_counts()

In [None]:
def loanBad(d):
    if d['loan_status'] == 'Charged Off':
        return 1
    elif d['loan_status'] == 'In Grace Period':
        return 1
    else:
        return 0
df_loandata['loan_is_bad'] = df_loandata.apply(loanBad, axis = 1)
df_loandata.head()

## Create your target dataset

In [None]:
y = df_loandata['loan_is_bad']
y.value_counts()

In [None]:
df_loandata = df_loandata.drop('loan_status', axis = 1) #this drops the target variable from your original dataset,
# axis = 1 refers to dropping a column - where axis = 0 would drop row

## Save a copy of the data as a csv file
### Why at this point?
- includes target and feature variables

In [None]:
df_loandata.to_csv('LoanData_Cleansed.csv')

### Create you Feature Dataset

In [None]:
X = df_loandata.drop('loan_is_bad', axis = 1)
X.info()

In [None]:
X.head()

# YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!

# You have create your X and y datasets.  You are ready to model and analyze.
 
# YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!YAY!

# --------------------------------------------------------------------------------
# XXXXXXXXXXXXXXXXXXX  Split the Data XXXXXXXXXXXXXXXXXXX
# --------------------------------------------------------------------------------

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [None]:
y_train

In [None]:
y_train.values.reshape(-1,1)

## Feature Scaling
- StandardScaler
    - The StandardScaler assumes your data is normally distributed within each feature and will scale them such that the distribution is now centred around 0, with a standard deviation of 1.
    ![alt text](images/StandardScalar.png "Standard")
- MinMaxScaler
    - The MinMaxScaler is the probably the most famous scaling algorithm, and follows the following formula for each feature:
    ![alt text](images/MinMaxScalar.png "MinMax")
    

From http://benalexkeen.com/feature-scaling-with-scikit-learn/

In [None]:
X_train.head()

In [None]:
scaler = StandardScaler()

X_train_sc = scaler.fit_transform(X_train)
X_train_sc = pd.DataFrame(X_train_sc, columns=X_train.columns)



X_test_sc = scaler.fit_transform(X_test)
X_test_sc = pd.DataFrame(X_test_sc, columns=X_test.columns)

## To scale or not to scale?? Why after the split?

In [None]:
fig, (ax1, ax2) = plt.subplots(ncols=2, figsize=(20, 5))

ax1.set_title('Before Scaling')
sns.kdeplot(X_train['loan_amnt'], ax=ax1)
sns.kdeplot(X_train['annual_inc'], ax=ax1)
sns.kdeplot(X_train['tot_cur_debt'], ax=ax1)
ax2.set_title('After Standard Scaler')
sns.kdeplot(X_train_sc['loan_amnt'], ax=ax2)
sns.kdeplot(X_train_sc['annual_inc'], ax=ax2)
sns.kdeplot(X_train_sc['tot_cur_debt'], ax=ax2)
plt.show()

In [None]:
X_train_sc.head()

In [None]:
X_test_sc.head()

# --------------------------------------------------------------------------------
# XXXXXXXXXXXXXXX  Select and Run the Model XXXXXXXXXXXXXXX
# --------------------------------------------------------------------------------

# XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
# Classification


### Logistic Regression
using non-scaled data

In [None]:
loan_names=X_train.columns.values

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
#without weights
logr = LogisticRegression()
logr.fit(X_train, y_train)
score = logr.score(X_test, y_test)
print(score)
print("")
log_pred = logr.predict(X_test)
print(confusion_matrix(y_test, log_pred))
print("")
print(classification_report(y_test, log_pred))

# --------------------------------------------------------------------------------
# XXXXXXXXXXXXXXXXX  Fine Tune the Model XXXXXXXXXXXXXXXXX
# --------------------------------------------------------------------------------

### Logistic Regression w/ Scaler

In [None]:
logrsc = LogisticRegression(class_weight = {0:1, 1:3})
logrsc.fit(X_train_sc, y_train)
score = logrsc.score(X_test_sc, y_test)
score

In [None]:
scaled_pred = logrsc.predict(X_test_sc)

In [None]:
print(classification_report(y_test, scaled_pred))

In [None]:
print(confusion_matrix(y_test, scaled_pred))

### Decision Tree
![Decision-Tree-Algorithms.png](images/Decision-Tree-Algorithms.png)

In [None]:
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.tree import DecisionTreeClassifier

In [None]:
classifier=DecisionTreeClassifier()
classifier=classifier.fit(X_train,y_train)

In [None]:
df_loanpred = classifier.predict(X_test)
print("Confusion Matrix")
print(confusion_matrix(y_test, df_loanpred))
print("")
print("classification Report")
print(classification_report(y_test, df_loanpred))

In [None]:
#Generating decision tree in a file(image)
from sklearn import tree
from io import StringIO
from IPython.display import Image, display

In [None]:
out = StringIO()
tree.export_graphviz(classifier, out_file=out, filled=True,rounded=True)
from sklearn.tree import export_graphviz
import pydot 

In [None]:
from sklearn.tree import export_graphviz
import pydot 

In [None]:
dot_data = StringIO()  
export_graphviz(classifier, out_file=dot_data,filled=True,rounded=True)

graph = pydot.graph_from_dot_data(dot_data.getvalue())  
Image(graph[0].create_png())  

In [None]:
#Using Decision Tree Classifier
clf2=DecisionTreeClassifier(max_depth=5, max_leaf_nodes=10)
clf2=clf2.fit(X_train,y_train)
clf2.score(X_test, y_test)

In [None]:
dot_data = StringIO()  
export_graphviz(clf2, out_file=dot_data, feature_names=loan_names, filled=True,rounded=True)

graph = pydot.graph_from_dot_data(dot_data.getvalue())  
Image(graph[0].create_png()) 

In [None]:
depth = range(1,12)
scores = []

for d in depth:
    classifier=DecisionTreeClassifier(max_depth = d)
    classifier=classifier.fit(X_train,y_train)
    scores.append(classifier.score(X_test, y_test))
    print("iteration {} done".format(d))


plt.plot(depth, scores, '-o')
plt.xlabel('depth, d')
plt.ylabel('scores')
plt.xticks(depth)
plt.show()

In [None]:
clf3=DecisionTreeClassifier(max_depth = 6, max_leaf_nodes=10)
clf3=classifier.fit(X_train,y_train)
clf3.score(X_test, y_test)

In [None]:
dot_data = StringIO()  
export_graphviz(clf3, out_file=dot_data, feature_names=loan_names, filled=True,rounded=True)

graph = pydot.graph_from_dot_data(dot_data.getvalue())  
Image(graph[0].create_png()) 

In [None]:
df_loanpred3 = clf3.predict(X_test)
print("Confusion Matrix")
print(confusion_matrix(y_test, df_loanpred3))
print("")
print("classification Report")
print(classification_report(y_test, df_loanpred3))

In [None]:
dt_fi = pd.DataFrame(classifier.feature_importances_)

In [None]:
names = pd.DataFrame(list(X.columns))
df_feat_imp = pd.concat([dt_fi, names], axis = 1)

In [None]:
df_feat_imp.columns = ['Importance', 'Features']
df_feat_imp.sort_values('Importance', ascending = False)

### Random Forest
![Ensemble-Algorithms.png](images/Ensemble-Algorithms.png)

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
rf = RandomForestClassifier(n_estimators = 90, max_depth = 10)
rf = rf.fit(X_train, y_train)
score = rf.score(X_test, y_test)
print(score)
rf_pred = rf.predict(X_test)
print(classification_report(y_test, rf_pred))

In [None]:
print(confusion_matrix(y_test, rf_pred))

In [None]:
rf = RandomForestClassifier(n_estimators = 90, max_depth = 10)
rf = rf.fit(X_train_sc, y_train)
score = rf.score(X_test_sc, y_test)
print(score)
rf_pred1 = rf.predict(X_test_sc)
print(classification_report(y_test, rf_pred1))

In [None]:
print(confusion_matrix(y_test, rf_pred1))

In [None]:
fi = pd.DataFrame(rf.feature_importances_)

In [None]:
columns = pd.DataFrame(list(X.columns))

In [None]:
features = pd.concat([columns, fi], axis = 1)
features.columns = ['Feature', 'Importance']

In [None]:
features.sort_values("Importance", ascending = False)

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline

In [None]:
# Create a pipeline
pipe = Pipeline([('classifier', RandomForestClassifier())])

# Create space of candidate learning algorithms and their hyperparameters
search_space = [{'classifier': [LogisticRegression()],
                 'classifier__penalty': ['l1', 'l2'],
                 'classifier__C': np.logspace(0, 4, 10)},
                {'classifier': [RandomForestClassifier()],
                 'classifier__n_estimators': [10, 100, 1000],
                 'classifier__max_features': [1, 2, 3]}]

In [None]:
# Create grid search 
clf_best = GridSearchCV(pipe, search_space, cv=5, verbose=0)

In [None]:
# Fit grid search
best_model = clf_best.fit(X_train, y_train)

In [None]:
# View best model
best_model.best_estimator_.get_params()['classifier']

In [None]:
best_model.predict(X_test)

In [None]:
score = best_model.score(X_test, y_test)
print("Accuracy Score")
print(score)
best_model_pred = best_model.predict(X_test)
print("")
print("Confusion Matrix")
print(confusion_matrix(y_test, best_model_pred))
print("")
print("Confusion Matrix")
print(classification_report(y_test, best_model_pred))

## Regression Analysis
![Regression-Algorithms.png](images/Regression-Algorithms.png)

In [None]:
df_loandata_reg = df_loandata[df_loandata['loan_is_bad'] == 0]

In [None]:
df_loandata_reg.info()

### Scikit Simple Regression
using annual_inc as predictor

In [None]:
X = df_loandata_reg.drop(['loan_is_bad', 'loan_amnt'], axis =1)
X.info()

In [None]:
y = df_loandata_reg['loan_amnt']

In [None]:
y

In [None]:
y.values.reshape(-1, 1)

In [None]:
from sklearn import datasets, linear_model
from sklearn.metrics import mean_squared_error, explained_variance_score, r2_score

In [None]:
reg = linear_model.LinearRegression()
annual_income = df_loandata_reg[['annual_inc']]

In [None]:
reg.fit(annual_income, y)

In [None]:
predictions = reg.predict(annual_income)

In [None]:
df_predictions = pd.DataFrame(predictions)

In [None]:
sns.lmplot(x="annual_inc", y="loan_amnt", data=df_loandata_reg,
            ci=None,  markers = 'o', order=3, hue="term", scatter_kws={"s": 50});

In [None]:
g = sns.lmplot(x="annual_inc", y="loan_amnt", col="term", hue="term",
              data=df_loandata_reg)
g = (g.set_axis_labels("Annual Income", "Loan Amount")
       .set(ylim=(0, 40000))
       .fig.subplots_adjust(wspace=.02))

### Add an order of 3

In [None]:
g = sns.lmplot(x="annual_inc", y="loan_amnt", order=3, col="term", hue="term",
              data=df_loandata_reg)
g = (g.set_axis_labels("Annual Income", "Loan Amount")
       .set(ylim=(0, 40000))
       .fig.subplots_adjust(wspace=.02))

In [None]:
plt.scatter(annual_income, y, color = 'blue')
plt.plot(annual_income, predictions, color = 'black')
plt.show()

In [None]:
reg.score(annual_income, y)

### Correlation Matrix

In [None]:
colormap = plt.cm.viridis
plt.figure(figsize=(14,12))
plt.title('Pearson Correlation of Features', y=1.05, size=15)
sns.heatmap(df_loandata_reg.astype(float).corr(),linewidths=0.1, vmax=1.0, square=True, cmap=colormap, linecolor='white', annot=True)

## Lasso Regression

In [None]:
from sklearn.linear_model import Lasso

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 42)

In [None]:
scaler = StandardScaler()

X_train_sc = scaler.fit_transform(X_train)
X_train_sc = pd.DataFrame(X_train_sc, columns=X_train.columns)


X_test_sc = scaler.fit_transform(X_test)
X_test_sc = pd.DataFrame(X_test_sc, columns=X_test.columns)

### Fine tune the model to find optimal alpha value for Lasso

In [None]:
alphas = [0.1, 0.5, 1, 5, 10, 25, 50, 100]
scores = []

for a in alphas:
    lasso = Lasso(alpha = a)
    lasso.fit(X_train_sc, y_train)
    scores.append(lasso.score(X_test_sc, y_test))
    print("iteration {} done".format(a))


plt.plot(alphas, scores, '-o')
plt.xlabel('alpha, a')
plt.ylabel('scores')
plt.xticks(alphas)
plt.show()

Alpha = 10 provides the best score (somewhat minimally)

In [None]:
lasso = Lasso(alpha=10.0)

In [None]:
lasso.fit(X_train_sc, y_train)

In [None]:
lasso_pred = lasso.predict(X_train_sc)

In [None]:
lasso.score(X_test_sc, y_test)

In [None]:
lasso.coef_

In [None]:
df_lasso = pd.DataFrame(lasso.coef_)

In [None]:
names = pd.DataFrame(list(X.columns))

In [None]:
df_lasso = pd.concat([df_lasso, names], axis = 1)

In [None]:
df_lasso.columns = ['Coefficient', 'Feature']
df_lasso

## Ridge

### Import needed libraries

In [None]:
from sklearn.linear_model import Ridge

### Fine tune the model to find optimal alpha value

In [None]:
alphas = [1, 5, 10, 25, 50, 100, 150, 200, 300, 400, 500]
scores = []

for a in alphas:
    ridge = Ridge(alpha = a)
    ridge.fit(X_train_sc, y_train)
    scores.append(ridge.score(X_test_sc, y_test))
    print("iteration {} done".format(a))


plt.plot(alphas, scores, '-o')
plt.xlabel('alpha, a')
plt.ylabel('scores')
plt.xticks(alphas)
plt.show()

In [None]:
ridge = Ridge(alpha=150)

In [None]:
ridge.fit(X_train_sc, y_train)

In [None]:
ridge_preds = ridge.predict(X_test_sc)

In [None]:
ridge.score(X_test_sc, y_test)

In [None]:
df_ridge = pd.DataFrame(ridge.coef_)

names = pd.DataFrame(list(X.columns))

df_ridge = pd.concat([df_ridge, names], axis = 1)

df_ridge.columns = ['Coefficient', 'Feature']
df_ridge

In [None]:
lasso_pred = pd.DataFrame(lasso_pred)
ridge_pred = pd.DataFrame(ridge_preds)
preds = pd.concat([lasso_pred, ridge_pred], axis = 1)

In [None]:
preds.columns = ['Lasso', 'Ridge']

In [None]:
sns.regplot(x = 'Lasso', y = 'Ridge', data = preds)
plt.ylim(0,40000)