In [139]:
import pandas as pd
import seaborn as sns
import re
from sklearn.preprocessing import StandardScaler, LabelEncoder, PolynomialFeatures
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LinearRegression, Ridge, Lasso, ElasticNet, PoissonRegressor, TweedieRegressor, GammaRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings

warnings.filterwarnings('ignore', category=UserWarning)

In [140]:
phone_data = pd.read_csv('./final_data.csv')

In [141]:
phone_data.drop(columns=['Unnamed: 0'], inplace=True)

In [142]:
phone_data

Unnamed: 0,Phone_name,Price,Release Date,Display Size,Refresh Rate,Rear Camera,Front Camera,Processor,RAM,Storage,Battery,px_width,px_height,Brand
0,Samsung Galaxy S24 FE,81500,2024-10-03,6.7,120,50,10,Exynos 2400e,8.0,128.0,4700,1080,2340,Samsung
1,Google Pixel 9 Pro Fold,245000,2024-09-04,8.0,120,48,10,Google Tensor G4,16.0,256.0,4650,2076,2152,Google
2,OnePlus Nord 4,57000,2024-08-08,6.7,120,50,16,Qualcomm Snapdragon 7+ Gen 3,8.0,128.0,5500,1240,2772,OnePlus
3,Google Pixel 9 Pro XL,145000,2024-08-22,6.8,120,50,42,Google Tensor G4,16.0,128.0,5060,1344,2992,Google
4,Huawei Pura 70 Ultra,200000,2024-04-29,6.8,120,50,13,Kirin 9010,16.0,512.0,5200,1280,2844,Huawei
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,Pixel 6,70000,2021-01-01,6.4,60,12,8,Google Tensor,8.0,128.0,4614,1080,2340,Google
111,Pixel 6a,50000,2022-01-01,6.1,60,12,8,Google Tensor,6.0,128.0,4410,720,1520,Google
112,A56,9000,2022-01-01,5.7,60,5,5,Unisoc SC7731E,1.0,16.0,4000,720,1600,Itel
113,S17,12000,2021-01-01,6.6,60,5,8,Unisoc SC9863A,2.0,16.0,5000,720,1600,Itel


Breaking down Release Date

In [143]:
# Converting to Datetime
phone_data['Release Date'] = pd.to_datetime(phone_data['Release Date'])

In [144]:
# Getting the Release Year
phone_data['Release_Year'] = phone_data['Release Date'].dt.year

In [145]:
# Getting the days since Release
phone_data['Days_Since_Release'] = pd.Timestamp('2024-10-12') - (phone_data['Release Date']) 

In [146]:
# Converting Days Since Release to int
phone_data['Days_Since_Release'] = phone_data['Days_Since_Release'].apply(lambda x: x.days)

In [147]:
phone_data['Processor'].nunique()

79

In [148]:
phone_data['Brand'].nunique()

16

Label Encoding

In [149]:
le = LabelEncoder()

In [150]:
# Label Encoding Phone name
phone_data['Phone_name_label'] = le.fit_transform(phone_data['Phone_name'])

In [151]:
# Label Encoding Processor
phone_data['Processor_label'] = le.fit_transform(phone_data['Processor'])

In [152]:
# Label Encoding Brand
phone_data['Brand_label'] = le.fit_transform(phone_data['Brand'])

Numerical Columns

In [153]:
num_columns = phone_data.drop(columns=['Phone_name', 'Processor', 'Phone_name_label', 'Release_Year', 'Brand', 'Release Date'])

In [154]:
num_columns

Unnamed: 0,Price,Display Size,Refresh Rate,Rear Camera,Front Camera,RAM,Storage,Battery,px_width,px_height,Days_Since_Release,Processor_label,Brand_label
0,81500,6.7,120,50,10,8.0,128.0,4700,1080,2340,9,10,11
1,245000,8.0,120,48,10,16.0,256.0,4650,2076,2152,38,14,1
2,57000,6.7,120,50,16,8.0,128.0,5500,1240,2772,65,45,8
3,145000,6.8,120,50,42,16.0,128.0,5060,1344,2992,51,14,1
4,200000,6.8,120,50,13,16.0,512.0,5200,1280,2844,166,28,3
...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,70000,6.4,60,12,8,8.0,128.0,4614,1080,2340,1380,11,1
111,50000,6.1,60,12,8,6.0,128.0,4410,720,1520,1015,11,1
112,9000,5.7,60,5,5,1.0,16.0,4000,720,1600,1015,73,5
113,12000,6.6,60,5,8,2.0,16.0,5000,720,1600,1380,74,5


In [155]:
num_columns.to_csv('./train_data.csv')

X and Y

In [156]:
x = num_columns.drop(columns=['Price'])
y = num_columns['Price']

Scaling

In [157]:
scaler = StandardScaler()

x = scaler.fit_transform(x)

Splitting the Data

In [158]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

Linear Regression

In [159]:
lin_reg = LinearRegression()

lin_reg.fit(x_train, y_train)

In [160]:
ytrain_prediction = lin_reg.predict(x_train)
ytest_prediction = lin_reg.predict(x_test)

Evaluation 

In [161]:
print('Train mae', mean_absolute_error(y_train, ytrain_prediction))
print('Test mae', mean_absolute_error(y_test, ytest_prediction))
print('Train mse', mean_squared_error(y_train, ytrain_prediction))
print('Test mse', mean_squared_error(y_test, ytest_prediction))
print('Train r2_score', r2_score(y_train, ytrain_prediction))
print('Test r2_score', r2_score(y_test, ytest_prediction))

Train mae 17149.386499077104
Test mae 21372.064924804345
Train mse 505826635.93179625
Test mse 880293181.3396736
Train r2_score 0.7994232720352187
Test r2_score 0.7148143220454


Transformation

In [162]:
# poly Transformation with degree 4
poly = PolynomialFeatures(degree=5)

# Transforming the x_train
x_train_poly = poly.fit_transform(x_train)
x_test_poly = poly.fit_transform(x_test)

In [163]:
# Creating the poly model
poly_model = LinearRegression()

poly_model.fit(x_train_poly, y_train)

In [164]:
# Getting the transformed polynomial models predictions

ytrain_poly_prediction = poly_model.predict(x_train_poly)
ytest_poly_prediction = poly_model.predict(x_test_poly)

In [165]:
# Getting the scores for the poly

print('Train mae', mean_absolute_error(y_train, ytrain_poly_prediction))
print('Test mae', mean_absolute_error(y_test, ytest_poly_prediction))
print('Train mse', mean_squared_error(y_train, ytrain_poly_prediction))
print('Test mse', mean_squared_error(y_test, ytest_poly_prediction))
print('Train r2_score', r2_score(y_train, ytrain_poly_prediction))
print('Test r2_score', r2_score(y_test, ytest_poly_prediction))

Train mae 7.420290469029999e-10
Test mae 88425.06137807034
Train mse 1.1639865477874518e-18
Test mse 40499312707.6576
Train r2_score 1.0
Test r2_score -12.120428734494556


Ridge Regularization

In [166]:
# looping through degrees and lambdas

for d in [2,3,4,5,6,7,]:
    for i in [0, 0.001, 0.01, 0.1, 10, 100, 1000]:

        # getting the transform function
        transform = PolynomialFeatures(degree=d)

        # transforming the train and test
        x_train_trans = transform.fit_transform(x_train)
        x_test_trans = transform.fit_transform(x_test)

        # defi Ridge
        ridge = Ridge(alpha=i)

        # fitting train data
        ridge.fit(x_train_trans, y_train)

        # Getting the train and test predictions
        ytrain_ridge_pred = ridge.predict(x_train_trans)
        ytest_ridge_pred = ridge.predict(x_test_trans)

        # Getting the scores
        print('degree:', d, 'alpha', i)
        print('Train mae:', mean_absolute_error(y_train, ytrain_ridge_pred))
        print('Test mae:', mean_absolute_error(y_test, ytest_ridge_pred))
        print('Train r2_score:', r2_score(y_train, ytrain_ridge_pred))
        print('Test r2_score:', r2_score(y_test, ytest_ridge_pred))
        print('\n')


degree: 2 alpha 0
Train mae: 5294.913043478261
Test mae: 108848.0
Train r2_score: 0.98210927226673
Test r2_score: -7.070891247207193


degree: 2 alpha 0.001
Train mae: 972.5692841716818
Test mae: 74487.45943568087
Train r2_score: 0.9991848057745983
Test r2_score: -3.477636806244531


degree: 2 alpha 0.01
Train mae: 1410.8656786913764
Test mae: 66011.91508845653
Train r2_score: 0.9984669148336284
Test r2_score: -2.8891126709894843


degree: 2 alpha 0.1
Train mae: 2270.8793706912916
Test mae: 48938.28658100092
Train r2_score: 0.9963644846440693
Test r2_score: -1.0933658685966652


degree: 2 alpha 10
Train mae: 8017.03939324065
Test mae: 24782.136624130315
Train r2_score: 0.9561495044662001
Test r2_score: 0.6482810743771019


degree: 2 alpha 100
Train mae: 15066.263474161033
Test mae: 29077.45546571451
Train r2_score: 0.8450642376505177
Test r2_score: 0.5173598627104681


degree: 2 alpha 1000
Train mae: 28972.859626997753
Test mae: 35454.623651241745
Train r2_score: 0.5011775183002338
Tes

  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)
  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)


degree: 5 alpha 0.01
Train mae: 1.7584862531309582
Test mae: 88390.70390602593
Train r2_score: 0.9999999878152569
Test r2_score: -12.101651488747441


degree: 5 alpha 0.1
Train mae: 17.334910000880843
Test mae: 88088.71672199981
Train r2_score: 0.9999988339803948
Test r2_score: -11.938273245618817


degree: 5 alpha 10
Train mae: 861.9600528617251
Test mae: 74807.15760179963
Train r2_score: 0.9980526646473369
Test r2_score: -7.139991645386816


degree: 5 alpha 100
Train mae: 2989.0585721279836
Test mae: 50013.0203995575
Train r2_score: 0.9869506274168239
Test r2_score: -2.975237513451961


degree: 5 alpha 1000
Train mae: 7884.429313070113
Test mae: 39284.84760401727
Train r2_score: 0.945255250676073
Test r2_score: -0.12524475075878727


degree: 6 alpha 0
Train mae: 2.632210231715125e-05
Test mae: 180679.58874192392
Train r2_score: 1.0
Test r2_score: -77.56390436631554


degree: 6 alpha 0.001
Train mae: 0.0979579892605018
Test mae: 180671.77082318344
Train r2_score: 0.9999999999423311
Te

  dual_coef = linalg.solve(K, y, assume_a="pos", overwrite_a=False)


degree: 7 alpha 0
Train mae: 0.00036648783264789256
Test mae: 454387.13584807125
Train r2_score: 0.9999999999999999
Test r2_score: -609.4716539972258


degree: 7 alpha 0.001
Train mae: 0.06583935147006306
Test mae: 454377.6538762544
Train r2_score: 0.9999999999675342
Test r2_score: -609.4433056876981


degree: 7 alpha 0.01
Train mae: 0.6554953490188421
Test mae: 454292.3930665969
Train r2_score: 0.9999999967612043
Test r2_score: -609.1885486408542


degree: 7 alpha 0.1
Train mae: 6.507072843331116
Test mae: 453450.1987878705
Train r2_score: 0.9999996832736967
Test r2_score: -606.686729767778


degree: 7 alpha 10
Train mae: 417.1601727559966
Test mae: 404011.5865860105
Train r2_score: 0.9990404661997369
Test r2_score: -500.8360463185962


degree: 7 alpha 100
Train mae: 1587.3162724968333
Test mae: 294268.68694025633
Train r2_score: 0.9923691488701814
Test r2_score: -383.57452742193937


degree: 7 alpha 1000
Train mae: 4230.540516684238
Test mae: 154642.76943016236
Train r2_score: 0.9707

degree of 2 and alpha of 10 is optimal

In [167]:
# optimizing the alphas

for i in[1,2,3,4,5,6,7,8,9,10]:
    degree = 2

    transform = PolynomialFeatures(degree=degree)

    x_train_trans = transform.fit_transform(x_train)
    x_test_trans = transform.fit_transform(x_test)

    ridge = Ridge(alpha=i)

    ridge.fit(x_train_trans, y_train)

    ytrain_ridge_pred = ridge.predict(x_train_trans)
    ytest_ridge_pred = ridge.predict(x_test_trans)

    print('alpha', i)
    print('Train mae:', mean_absolute_error(y_train, ytrain_ridge_pred))
    print('Test mae:', mean_absolute_error(y_test, ytest_ridge_pred))
    print('Train r2_score:', r2_score(y_train, ytrain_ridge_pred))
    print('Test r2_score:', r2_score(y_test, ytest_ridge_pred))
    print('\n')


alpha 1
Train mae: 4547.4888739579765
Test mae: 34264.59042911098
Train r2_score: 0.9860006119084849
Test r2_score: 0.3497278255037203


alpha 2
Train mae: 5471.384839291927
Test mae: 31464.17507662327
Train r2_score: 0.98011873906135
Test r2_score: 0.5125706044009515


alpha 3
Train mae: 6044.684550846799
Test mae: 29821.963525319792
Train r2_score: 0.9757872214401135
Test r2_score: 0.5740835483228353


alpha 4
Train mae: 6485.534938578539
Test mae: 28633.24869724115
Train r2_score: 0.9721646911432439
Test r2_score: 0.6055601505457642


alpha 5
Train mae: 6825.094153010046
Test mae: 27698.095173984977
Train r2_score: 0.9689654914525663
Test r2_score: 0.6236990417808672


alpha 6
Train mae: 7121.354517684356
Test mae: 26928.619996763246
Train r2_score: 0.9660555605938966
Test r2_score: 0.6346841972418714


alpha 7
Train mae: 7379.631875964044
Test mae: 26277.073854949973
Train r2_score: 0.963359456509197
Test r2_score: 0.6413898970604912


alpha 8
Train mae: 7610.993780859394
Test mae:

Lasso Regularization

In [168]:
# looping through degrees and lambdas

for d in [2,3,4,5,6,7,]:
    for i in [0, 0.001, 0.01, 0.1, 10, 100, 1000]:

        # getting the transform function
        transform = PolynomialFeatures(degree=d)

        # transforming the train and test
        x_train_trans = transform.fit_transform(x_train)
        x_test_trans = transform.fit_transform(x_test)

        # defi Ridge
        lasso = Lasso(alpha=i)

        # fitting train data
        lasso.fit(x_train_trans, y_train)

        # Getting the train and test predictions
        ytrain_lasso_pred = lasso.predict(x_train_trans)
        ytest_lasso_pred = lasso.predict(x_test_trans)

        # Getting the scores
        print('degree:', d, 'alpha', i)
        print('Train mae:', mean_absolute_error(y_train, ytrain_lasso_pred))
        print('Test mae:', mean_absolute_error(y_test, ytest_lasso_pred))
        print('Train r2_score:', r2_score(y_train, ytrain_lasso_pred))
        print('Test r2_score:', r2_score(y_test, ytest_lasso_pred))
        print('\n')

degree: 2 alpha 0
Train mae: 2063.3068961996696
Test mae: 54231.61137384893
Train r2_score: 0.9966365531119513
Test r2_score: -1.1493913166674745


degree: 2 alpha 0.001
Train mae: 2063.319212332437
Test mae: 54231.108405640305
Train r2_score: 0.9966365193876147
Test r2_score: -1.149336731995255


degree: 2 alpha 0.01
Train mae: 2063.430257662815
Test mae: 54226.58070393153
Train r2_score: 0.9966362153905532
Test r2_score: -1.1488455347973932


degree: 2 alpha 0.1
Train mae: 2064.545854401319
Test mae: 54181.27492445224
Train r2_score: 0.9966331360770909
Test r2_score: -1.1439385871473684


degree: 2 alpha 10
Train mae: 2282.9711094522
Test mae: 49291.93310768972
Train r2_score: 0.9959549268622214
Test r2_score: -0.6668432143628618


degree: 2 alpha 100
Train mae: 4839.608411685537
Test mae: 32704.852179085814
Train r2_score: 0.9842214284975026
Test r2_score: 0.40455525845222473


degree: 2 alpha 1000
Train mae: 10220.350629621184
Test mae: 25010.579402185125
Train r2_score: 0.92670352

Degree 2 alpha 1000

In [169]:
# optimizing the alphas

for i in[1000,1100,1200,1300,1400,1500,1700,1800,1900,2000,2100,2200]:
    degree = 2

    transform = PolynomialFeatures(degree=degree)

    x_train_trans = transform.fit_transform(x_train)
    x_test_trans = transform.fit_transform(x_test)

    lasso = Lasso(alpha=i)

    lasso.fit(x_train_trans, y_train)

    ytrain_lasso_pred = lasso.predict(x_train_trans)
    ytest_lasso_pred = lasso.predict(x_test_trans)

    print('alpha', i)
    print('Train mae:', mean_absolute_error(y_train, ytrain_lasso_pred))
    print('Test mae:', mean_absolute_error(y_test, ytest_lasso_pred))
    print('Train r2_score:', r2_score(y_train, ytrain_lasso_pred))
    print('Test r2_score:', r2_score(y_test, ytest_lasso_pred))
    print('\n')

alpha 1000
Train mae: 10220.350629621184
Test mae: 25010.579402185125
Train r2_score: 0.926703522989225
Test r2_score: 0.6199854827332645


alpha 1100
Train mae: 10453.81903931013
Test mae: 25113.94860259732
Train r2_score: 0.9226186488852082
Test r2_score: 0.6135507840200405


alpha 1200
Train mae: 10690.767871910637
Test mae: 25143.837342787392
Train r2_score: 0.9183102228299411
Test r2_score: 0.6080030404659365


alpha 1300
Train mae: 10935.833773017393
Test mae: 25224.65707830058
Train r2_score: 0.9138869044310052
Test r2_score: 0.6007969669152871


alpha 1400
Train mae: 11203.244764868683
Test mae: 25323.422925857027
Train r2_score: 0.909166066131595
Test r2_score: 0.5921977377340719


alpha 1500
Train mae: 11507.555794114136
Test mae: 25547.46769341617
Train r2_score: 0.9046855065901626
Test r2_score: 0.5832042597162671


alpha 1700
Train mae: 12099.21193829915
Test mae: 26197.10366230718
Train r2_score: 0.8956344892083462
Test r2_score: 0.5649227518502189


alpha 1800
Train mae:

alpha - 2000

Elastic Net

In [170]:
# looping through degrees and lambdas

for d in [2,3,4,5,6,7,]:
    for i in [0, 0.001, 0.01, 0.1, 10, 100, 1000]:

        # getting the transform function
        transform = PolynomialFeatures(degree=d)

        # transforming the train and test
        x_train_trans = transform.fit_transform(x_train)
        x_test_trans = transform.fit_transform(x_test)

        # defi Ridge
        elastic = ElasticNet(alpha=i)

        # fitting train data
        elastic.fit(x_train_trans, y_train)

        # Getting the train and test predictions
        ytrain_elastic_pred = elastic.predict(x_train_trans)
        ytest_elastic_pred = elastic.predict(x_test_trans)

        # Getting the scores
        print('degree:', d, 'alpha', i)
        print('Train mae:', mean_absolute_error(y_train, ytrain_elastic_pred))
        print('Test mae:', mean_absolute_error(y_test, ytest_elastic_pred))
        print('Train r2_score:', r2_score(y_train, ytrain_elastic_pred))
        print('Test r2_score:', r2_score(y_test, ytest_elastic_pred))
        print('\n')

degree: 2 alpha 0
Train mae: 2063.3068961996696
Test mae: 54231.61137384893
Train r2_score: 0.9966365531119513
Test r2_score: -1.1493913166674745


degree: 2 alpha 0.001
Train mae: 2234.9829023830684
Test mae: 50121.25354365472
Train r2_score: 0.9961492462213124
Test r2_score: -0.8168020009690931


degree: 2 alpha 0.01
Train mae: 3536.307157249417
Test mae: 36978.980618350644
Train r2_score: 0.99132660810076
Test r2_score: 0.07807697203331021


degree: 2 alpha 0.1
Train mae: 6698.349527247249
Test mae: 28053.03988003761
Train r2_score: 0.9701871038432455
Test r2_score: 0.6175924491746432


degree: 2 alpha 10
Train mae: 24257.76617957146
Test mae: 32883.44176832653
Train r2_score: 0.6474207597080093
Test r2_score: 0.4076040359006219


degree: 2 alpha 100
Train mae: 36264.08713148508
Test mae: 39333.67849174647
Train r2_score: 0.2170790103922957
Test r2_score: 0.13025523925464622


degree: 2 alpha 1000
Train mae: 39886.72806951436
Test mae: 41806.87975548145
Train r2_score: 0.02992926437

degree 4 alpha 0.001

In [171]:
phone_data.columns

Index(['Phone_name', 'Price', 'Release Date', 'Display Size', 'Refresh Rate',
       'Rear Camera', 'Front Camera', 'Processor', 'RAM', 'Storage', 'Battery',
       'px_width', 'px_height', 'Brand', 'Release_Year', 'Days_Since_Release',
       'Phone_name_label', 'Processor_label', 'Brand_label'],
      dtype='object')

Ridge with polynomial of degree 2

In [220]:
poly = PolynomialFeatures(degree=2)

x_train_trial_poly = poly.fit_transform(x_train)
x_test_trial_poly = poly.fit_transform(x_test)

param_grid = {'alpha': [20,21,22,24,26,28]}

ridge = Ridge()

ridge_grid = GridSearchCV(ridge, param_grid, cv=5, scoring='r2')

ridge_grid.fit(x_train_trial_poly, y_train)

best_ridge = ridge_grid.best_estimator_
print(best_ridge)
print('Best params:', ridge_grid.best_params_)
print('Best Ridge r2_score', ridge_grid.best_score_)

Ridge(alpha=21)
Best params: {'alpha': 21}
Best Ridge r2_score 0.5778349115719272


In [197]:
best_ridge.fit(x_train_trial_poly, y_train)

print('Train mae:', mean_absolute_error(y_train, best_ridge.predict(x_train_trial_poly)))
print('Test mae:', mean_absolute_error(y_test, best_ridge.predict(x_test_trial_poly)))
print('Train r2_score:', r2_score(y_train, best_ridge.predict(x_train_trial_poly)))
print('Test r2_score:', r2_score(y_test, best_ridge.predict(x_test_trial_poly)))

Train mae: 9566.933237373401
Test mae: 25192.657715384652
Train r2_score: 0.9353010964949076
Test r2_score: 0.6286561605567471


Ridge With Polynomial of degree 3

In [203]:
poly = PolynomialFeatures(degree=3)

x_train_trial_poly = poly.fit_transform(x_train)
x_test_trial_poly = poly.fit_transform(x_test)

param_grid = {'alpha': [34,35,36,37,38,39,40]}

ridge = Ridge()

ridge_grid = GridSearchCV(ridge, param_grid, cv=5, scoring='r2')

ridge_grid.fit(x_train_trial_poly, y_train)

best_ridge = ridge_grid.best_estimator_
print(best_ridge)
print('Best params:', ridge_grid.best_params_)
print('Best Ridge r2_score', ridge_grid.best_score_)

Ridge(alpha=35)
Best params: {'alpha': 35}
Best Ridge r2_score 0.5149549973972762


In [204]:
best_ridge.fit(x_train_trial_poly, y_train)

print('Train mae:', mean_absolute_error(y_train, best_ridge.predict(x_train_trial_poly)))
print('Test mae:', mean_absolute_error(y_test, best_ridge.predict(x_test_trial_poly)))
print('Train r2_score:', r2_score(y_train, best_ridge.predict(x_train_trial_poly)))
print('Test r2_score:', r2_score(y_test, best_ridge.predict(x_test_trial_poly)))

Train mae: 5179.239815787226
Test mae: 28127.474975263223
Train r2_score: 0.979381598892184
Test r2_score: 0.5283175415867609


In [176]:
rid = Ridge(alpha=4.2)

rid.fit(x_train_trial_poly, y_train)

print('Train mae:', mean_absolute_error(y_train, rid.predict(x_train_trial_poly)))
print('Test mae:', mean_absolute_error(y_test, rid.predict(x_test_trial_poly)))
print('Train r2_score:', r2_score(y_train, rid.predict(x_train_trial_poly)))
print('Test r2_score:', r2_score(y_test, rid.predict(x_test_trial_poly)))


Train mae: 1758.9896662657404
Test mae: 27850.48402450477
Train r2_score: 0.9968150955696771
Test r2_score: 0.559900752989533


In [216]:
param_grid = {'alpha':[3999,4000,4200, 4300, 4500,4700,4800]}

lasso = Lasso()

lasso_grid = GridSearchCV(lasso, param_grid, cv=5, scoring='r2')

lasso_grid.fit(x_train_trial_poly, y_train)

best_lasso = lasso_grid.best_estimator_
print('Best params:', lasso_grid.best_params_)
print('Best Lasso r2_score', lasso_grid.best_score_)

Best params: {'alpha': 4700}
Best Lasso r2_score -1.106728783988325


In [217]:
best_lasso.fit(x_train_trial_poly, y_train)

print('Train mae:', mean_absolute_error(y_train, best_lasso.predict(x_train_trial_poly)))
print('Test mae:', mean_absolute_error(y_test, best_lasso.predict(x_test_trial_poly)))
print('Train r2_score:', r2_score(y_train, best_lasso.predict(x_train_trial_poly)))
print('Test r2_score:', r2_score(y_test, best_lasso.predict(x_test_trial_poly)))

Train mae: 15415.944337049292
Test mae: 32609.721468892767
Train r2_score: 0.8560767898290156
Test r2_score: 0.10052421953003265


Elastic Net

In [222]:
param_grid = {'alpha': [0.5,0.6,0.7,0.8]}

elastic = ElasticNet()

elastic_grid = GridSearchCV(elastic, param_grid, cv=5, scoring='r2')

elastic_grid.fit(x_train_trial_poly, y_train)

best_elsatic = elastic_grid.best_estimator_
print('Best params:', elastic_grid.best_params_)
print('Best Ridge r2_score', elastic_grid.best_score_)

Best params: {'alpha': 0.6}
Best Ridge r2_score 0.5776897794114285


In [223]:
best_elsatic.fit(x_train_trial_poly, y_train)

print('Train mae:', mean_absolute_error(y_train, best_elsatic.predict(x_train_trial_poly)))
print('Test mae:', mean_absolute_error(y_test, best_elsatic.predict(x_test_trial_poly)))
print('Train r2_score:', r2_score(y_train, best_elsatic.predict(x_train_trial_poly)))
print('Test r2_score:', r2_score(y_test, best_elsatic.predict(x_test_trial_poly)))

Train mae: 10210.51168027805
Test mae: 26014.83116966401
Train r2_score: 0.9249062859514323
Test r2_score: 0.6124120734349996


Poisson Regressor

In [224]:
poly = PolynomialFeatures(degree=2)

x_train_trial_poly = poly.fit_transform(x_train)
x_test_trial_poly = poly.fit_transform(x_test)

param_grid = {'alpha': [7,8,9,10,11,12,13],
              'max_iter': [100,200,300,400,500]}

poisson = PoissonRegressor()

poisson_grid = GridSearchCV(poisson, param_grid, cv=5, scoring='r2')

poisson_grid.fit(x_train_trial_poly, y_train)

best_poisson = poisson_grid.best_estimator_
print(best_ridge)
print('Best params:', poisson_grid.best_params_)
print('Best poisson r2_score', poisson_grid.best_score_)

Ridge(alpha=21)
Best params: {'alpha': 13, 'max_iter': 100}
Best poisson r2_score -5.688833986477519


Tweedie Regressor

In [228]:
poly = PolynomialFeatures(degree=2)

x_train_trial_poly = poly.fit_transform(x_train)
x_test_trial_poly = poly.fit_transform(x_test)

param_grid = {'alpha': [0,0.1,0.2,0.3,0.4,0.5,0.7,0.8],
              'max_iter': [100,200,300,400,500]}

tweedie = TweedieRegressor()

tweedie_grid = GridSearchCV(tweedie, param_grid, cv=5, scoring='r2')

tweedie_grid.fit(x_train_trial_poly, y_train)

best_tweedie = tweedie_grid.best_estimator_
print(best_tweedie)
print('Best params:', tweedie_grid.best_params_)
print('Best tweedie r2_score', tweedie_grid.best_score_)

TweedieRegressor(alpha=0.3, max_iter=200)
Best params: {'alpha': 0.3, 'max_iter': 200}
Best tweedie r2_score 0.5776765697870052


In [229]:
best_tweedie.fit(x_train_trial_poly, y_train)

print('Train mae:', mean_absolute_error(y_train, best_tweedie.predict(x_train_trial_poly)))
print('Test mae:', mean_absolute_error(y_test, best_tweedie.predict(x_test_trial_poly)))
print('Train r2_score:', r2_score(y_train, best_tweedie.predict(x_train_trial_poly)))
print('Test r2_score:', r2_score(y_test, best_tweedie.predict(x_test_trial_poly)))

Train mae: 10209.784540907918
Test mae: 26014.149981724328
Train r2_score: 0.9249178317965284
Test r2_score: 0.6124279461139064


In [183]:
num_columns.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 115 entries, 0 to 114
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   Price               115 non-null    int64  
 1   Display Size        115 non-null    float64
 2   Refresh Rate        115 non-null    int64  
 3   Rear Camera         115 non-null    int64  
 4   Front Camera        115 non-null    int64  
 5   RAM                 115 non-null    float64
 6   Storage             115 non-null    float64
 7   Battery             115 non-null    int64  
 8   px_width            115 non-null    int64  
 9   px_height           115 non-null    int64  
 10  Days_Since_Release  115 non-null    int64  
 11  Processor_label     115 non-null    int64  
 12  Brand_label         115 non-null    int64  
dtypes: float64(3), int64(10)
memory usage: 11.8 KB
