# Multiple Types of Regression

This notebook will cover different regression models with sklearn, using test train split to work with data properly

In [1]:
import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [2]:
data = pd.read_csv('datasets/diamonds_processed.csv', index_col=0)

data.head()

Unnamed: 0_level_0,price,cut_Fair,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,color_D,color_E,color_F,color_G,color_H,color_I,color_J,carat,depth,table,x,y,z
clarity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
1,3633,0,1,0,0,0,0,0,1,0,0,0,0,0.451238,1.692467,1.561978,0.495029,0.404561,0.679789
4,3273,0,0,1,0,0,0,0,0,1,0,0,0,-0.15196,-1.070787,-0.646155,0.10522,0.13462,-0.007557
4,3167,0,0,0,1,0,0,0,1,0,0,0,0,-0.17276,0.31084,-0.204529,-0.001091,-0.065659,0.006763
0,3355,0,0,1,0,0,0,0,0,0,1,0,0,0.992037,0.103596,-0.646155,1.070883,1.014104,1.066421
3,1687,0,0,0,1,0,0,0,1,0,0,0,0,-0.609559,0.241759,0.237098,-0.550368,-0.570709,-0.537386


### Different regression models

### Linear Regression
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html

In [3]:
from sklearn.linear_model import LinearRegression

def linear_model(x_train, y_train):
    
    print("Linear Regression ")
    linear_regression = LinearRegression()
    
    linear_regression.fit(x_train, y_train)
    
    return linear_regression

### Lasso regression
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html

In [4]:
from sklearn.linear_model import Lasso

def lasso_model(x_train, y_train):
    
    print("Lasso Regression")
    lasso_regression = Lasso(alpha=0.8, max_iter=10000)
    
    lasso_regression.fit(x_train, y_train)
    
    return lasso_regression

### Ridge regression
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html

In [5]:
from sklearn.linear_model import Ridge

def ridge_model(x_train, y_train):
    
    print("Ridge Regression")
    ridge_regression = Ridge(alpha=0.9)
    
    ridge_regression.fit(x_train, y_train)
    
    return ridge_regression

In [6]:
def build_and_train_model(data, target_name, reg_fn):
    
    X = data.drop(target_name, axis=1)
    Y = data[target_name]
    
    x_train, x_test, y_train, y_test = \
        train_test_split(X, Y, test_size = 0.2, random_state=0)

    model = reg_fn(x_train, y_train)
    
    score = model.score(x_train, y_train)
    print("Training Score : ", score)

    y_pred = model.predict(x_test)
    r_score = r2_score(y_test, y_pred)
    print("Testing Score : ", r_score)
    
    return {'model' : model, 
            'x_train' : x_train, 'x_test' : x_test,
            'y_train' : y_train, 'y_test' : y_test, 
            'y_pred' : y_pred
           }

In [7]:
linear_reg = build_and_train_model(data, "price", linear_model)

Linear Regression 
Training Score :  0.8861866384245513
Testing Score :  0.8706206688701211


In [8]:
lasso_reg = build_and_train_model(data, "price", lasso_model)

Lasso Regression
Training Score :  0.8861744397608655
Testing Score :  0.8707885924879772


In [9]:
ridge_reg = build_and_train_model(data, "price", ridge_model)

Ridge Regression
Training Score :  0.8861861274008298
Testing Score :  0.8706042491018482


In [10]:
linear_reg['model']

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)

### SGD Regression
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.SGDRegressor.html

In [11]:
from sklearn.linear_model import SGDRegressor

def sgd_model(x_train, y_train):
    
    print("SGD Regression")
    sgd_regression = SGDRegressor(max_iter=2000)
    
    sgd_regression.fit(x_train, y_train)
    
    return sgd_regression

In [12]:
sgd_reg = build_and_train_model(data, "price", sgd_model)

SGD Regression
Training Score :  0.8858018649909748
Testing Score :  0.8703686860919269
