In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score


In [2]:
data=pd.read_csv('./Dataset/diamonds_processed.csv',index_col=0)
data.head()

Unnamed: 0_level_0,price,cut_Fair,cut_Good,cut_Ideal,cut_Premium,cut_Very Good,color_D,color_E,color_F,color_G,color_H,color_I,color_J,carat,depth,table,x,y,z
clarity,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
2,945,0,0,0,1,0,0,0,1,0,0,0,0,-0.839708,0.319573,-0.20161,-0.900974,-0.953835,-0.890947
4,746,0,0,0,0,1,0,0,0,1,0,0,0,-0.902687,-1.801708,1.555345,-0.980896,-0.92699,-1.120639
1,5866,0,0,1,0,0,0,0,0,0,0,1,0,0.923703,0.036735,0.237629,0.99052,1.041622,1.018362
1,5006,0,0,0,1,0,1,0,0,0,0,0,0,0.440865,-0.033974,-0.20161,0.670831,0.630003,0.645114
3,666,0,1,0,0,0,0,0,0,0,0,1,0,-0.839708,1.450923,-1.519326,-0.936495,-0.909094,-0.776102


In [3]:
from sklearn.linear_model import LinearRegression
def linear_model(x_train,y_train):

    print('Linear Regression')
    
    linear_regression = LinearRegression()
    linear_regression.fit(x_train,y_train)
    
    return linear_regression

from sklearn.linear_model import Lasso
def lasso_model(x_train,y_train):

    print('Lasso Regression')
    
    lasso_regression = Lasso(alpha=0.8,max_iter=10000)
    lasso_regression.fit(x_train,y_train)
    
    return lasso_regression

from sklearn.linear_model import Ridge
def ridge_model(x_train,y_train):

    print('Ridge Regression')
    
    ridge_regression = Ridge(alpha=0.9)
    ridge_regression.fit(x_train,y_train)
    
    return ridge_regression


In [4]:
def build_and_train_model(data,target_name,reg_fn):
    X = data.drop(target_name, axis=1)
    Y=data[target_name]
    
    x_train, x_test, y_train, y_test=train_test_split(X,Y,test_size=0.2,random_state=0)
    
    model=reg_fn(x_train,y_train)
    
    score=model.score(x_train,y_train)
    print('Training Score : ', score)
    
    y_pred=model.predict(x_test)
    r_score=r2_score(y_test,y_pred)
    print("Testing Score : ",r_score)
    
    return {'model':model,
           'x_train':x_train, 'x_test':x_test,
           'y_train':y_train, 'y_test':y_test,
           'y_pred':y_pred
           }

In [5]:
linear_reg=build_and_train_model(data,'price',linear_model)

Linear Regression
Training Score :  0.880772672816
Testing Score :  0.8686317293


In [6]:
lasso_reg=build_and_train_model(data,'price',lasso_model)

Lasso Regression
Training Score :  0.880687534732
Testing Score :  0.868452368952


In [7]:
ridge_reg=build_and_train_model(data,'price',ridge_model)

Ridge Regression
Training Score :  0.880769432795
Testing Score :  0.868586474155


In [8]:
linear_reg['model']

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False)

In [9]:
lasso_reg['model']

Lasso(alpha=0.8, copy_X=True, fit_intercept=True, max_iter=10000,
   normalize=False, positive=False, precompute=False, random_state=None,
   selection='cyclic', tol=0.0001, warm_start=False)

In [10]:
from sklearn.linear_model import SGDRegressor
def sdg_model(x_train,y_train):

    print('SDG Regression')
    
    sdg_regression = Ridge(max_iter=2000)
    sdg_regression.fit(x_train,y_train)
    
    return sdg_regression


In [11]:
sdg_reg=build_and_train_model(data,'price',sdg_model)

SDG Regression
Training Score :  0.880768270271
Testing Score :  0.868586348851
