In [34]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score
from sklearn.metrics import mean_absolute_percentage_error

In [2]:
data = pd.read_csv('cleaned.csv')
data.head()

Unnamed: 0,MRP,Processor,RAM,SSD,OS,Brand,RamType,Display,HDD
0,36990.0,Intel Core i3 Processor,8,256,Windows 11,Lenovo,DDR4,35.56,0
1,39990.0,Intel Core i3 Processor,8,512,Windows 11,Lenovo,DDR4,39.62,0
2,32990.0,Intel Core i3 Processor,8,512,Windows 11,ASUS,DDR4,39.62,0
3,49990.0,AMD Ryzen 5 Hexa Core Processor,8,512,Windows 10,HP,DDR4,39.62,0
4,49990.0,Intel Core i5 Processor,8,512,Windows 11,ASUS,DDR4,39.62,0


In [3]:
data = data.drop(columns = ['Brand'])

In [4]:
data.head()

Unnamed: 0,MRP,Processor,RAM,SSD,OS,RamType,Display,HDD
0,36990.0,Intel Core i3 Processor,8,256,Windows 11,DDR4,35.56,0
1,39990.0,Intel Core i3 Processor,8,512,Windows 11,DDR4,39.62,0
2,32990.0,Intel Core i3 Processor,8,512,Windows 11,DDR4,39.62,0
3,49990.0,AMD Ryzen 5 Hexa Core Processor,8,512,Windows 10,DDR4,39.62,0
4,49990.0,Intel Core i5 Processor,8,512,Windows 11,DDR4,39.62,0


In [5]:
x_train, x_test, y_train, y_test = train_test_split(data.drop(columns = 'MRP'), data['MRP'], test_size = 0.2)

In [6]:
le = LabelEncoder()

In [7]:
data['Processor_Encoded'] = le.fit_transform(data['Processor'])
data['OS_Encoded'] = le.fit_transform(data['OS'])
data['RamType_Encoded'] = le.fit_transform(data['RamType'])

In [8]:
data.head()

Unnamed: 0,MRP,Processor,RAM,SSD,OS,RamType,Display,HDD,Processor_Encoded,OS_Encoded,RamType_Encoded
0,36990.0,Intel Core i3 Processor,8,256,Windows 11,DDR4,35.56,0,17,4,0
1,39990.0,Intel Core i3 Processor,8,512,Windows 11,DDR4,39.62,0,17,4,0
2,32990.0,Intel Core i3 Processor,8,512,Windows 11,DDR4,39.62,0,17,4,0
3,49990.0,AMD Ryzen 5 Hexa Core Processor,8,512,Windows 10,DDR4,39.62,0,6,3,0
4,49990.0,Intel Core i5 Processor,8,512,Windows 11,DDR4,39.62,0,18,4,0


In [9]:
laptop_model = data[['Processor_Encoded', 'OS_Encoded', 'RamType_Encoded', 'RAM', 'MRP', 'SSD', 'HDD', 'Display']]
laptop_model.head()

Unnamed: 0,Processor_Encoded,OS_Encoded,RamType_Encoded,RAM,MRP,SSD,HDD,Display
0,17,4,0,8,36990.0,256,0,35.56
1,17,4,0,8,39990.0,512,0,39.62
2,17,4,0,8,32990.0,512,0,39.62
3,6,3,0,8,49990.0,512,0,39.62
4,18,4,0,8,49990.0,512,0,39.62


In [18]:
x = laptop_model.drop('MRP', axis = 1)
y = laptop_model['MRP'].values

In [62]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [63]:
lr = LinearRegression()
lr.fit(x_train, y_train)
y_pred_lr = lr.predict(x_test)

In [64]:
print('R2 Score:        ', r2_score(y_pred_lr, y_test))
print('MA Percent Error:', mean_absolute_percentage_error(y_pred_lr, y_test))

R2 Score:         0.766015556624396
MA Percent Error: 0.3036590034557482


In [65]:
rf = RandomForestRegressor()
rf.fit(x_train, y_train)
y_pred_rf = rf.predict(x_test)

In [66]:
print('R2 Score:        ', r2_score(y_pred_rf, y_test))
print('MA Percent Error:', mean_absolute_percentage_error(y_pred_rf, y_test))

R2 Score:         0.8096099236151931
MA Percent Error: 0.17635459310648618


In [67]:
y_pred_rf

array([ 56869.00952381,  42678.34631105, 339870.        ,  31413.42      ,
        89251.88952381,  80904.74419444,  60872.12174603,  92517.85395635,
        80814.28868254,  52102.43623016,  50483.39262951,  33123.2817096 ,
        40012.72166667, 148116.73      ,  70123.63507995,  53785.97253968,
        33877.97083333,  41816.04595238,  60454.57539683,  73313.76572222,
        26045.93333333,  85905.70595238,  58000.51564109,  75193.90210945,
       293761.        ,  56111.36905733, 142095.        ,  42678.34631105,
        66788.53325397,  58000.51564109,  37456.81392857,  91740.        ,
        42678.34631105, 137319.06190476,  42152.94702381,  20728.3702381 ,
        56111.36905733,  82732.62252778,  37854.11392857,  72500.65      ,
        56111.36905733, 278029.        ,  58000.51564109,  58000.51564109,
        41816.04595238,  74366.71948052,  20728.3702381 ,  71509.65      ,
       299284.4       ,  28863.29511905,  57528.27439683, 340918.8       ,
        42678.34631105,  

In [73]:
filename = 'rf_model'
pickle.dump(rf, open(filename, 'wb'))

In [75]:
loaded_model = pickle.load(open(filename, 'rb'))
loaded_model.predict(x_test)

array([ 56869.00952381,  42678.34631105, 339870.        ,  31413.42      ,
        89251.88952381,  80904.74419444,  60872.12174603,  92517.85395635,
        80814.28868254,  52102.43623016,  50483.39262951,  33123.2817096 ,
        40012.72166667, 148116.73      ,  70123.63507995,  53785.97253968,
        33877.97083333,  41816.04595238,  60454.57539683,  73313.76572222,
        26045.93333333,  85905.70595238,  58000.51564109,  75193.90210945,
       293761.        ,  56111.36905733, 142095.        ,  42678.34631105,
        66788.53325397,  58000.51564109,  37456.81392857,  91740.        ,
        42678.34631105, 137319.06190476,  42152.94702381,  20728.3702381 ,
        56111.36905733,  82732.62252778,  37854.11392857,  72500.65      ,
        56111.36905733, 278029.        ,  58000.51564109,  58000.51564109,
        41816.04595238,  74366.71948052,  20728.3702381 ,  71509.65      ,
       299284.4       ,  28863.29511905,  57528.27439683, 340918.8       ,
        42678.34631105,  