# Import Library

In [112]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
import pickle

# Load Data

In [97]:
data = pd.read_csv('laptop_price.csv',encoding = "ISO-8859-1")

# Data Preprocessing

In [98]:
data['Ram'] = data['Ram'].str.replace('GB','').astype('int32')
data['Weight'] = data['Weight'].str.replace('kg','').astype('float32')

In [99]:
def assign_company(company):
    if company == 'Samsung' or company == 'Razer' or company == 'Mediacom' or company == 'Microsoft' or company == 'Xiaomi' or company == 'Vero' or company == 'Chuwi' or company == 'Google' or company == 'Fujitsu' or company == 'LG' or company == 'Huawei':
        return 'Other'
    else:
        return company
data['Company'] = data['Company'].apply(assign_company)

In [100]:
data['Touchscreen'] = data['ScreenResolution'].apply(lambda x:1 if 'Touchscreen' in x else 0)
data['Ips'] = data['ScreenResolution'].apply(lambda x:1 if 'Ips' in x else 0)

In [101]:
data['cpu_name'] = data['Cpu'].apply(lambda x:' '.join(x.split()[0:3]))

In [102]:
def set_processor(name):
    if name == 'Intel Core i7' or name == 'Intel Core i5' or name == 'Intel Core i3':
        return name
    else:
        if name.split()[0] == 'AMD':
            return 'AMD'
        else:
            return 'Other'

data['cpu_name'] = data['cpu_name'].apply(set_processor)

In [103]:
data['gpu_name'] = data['Gpu'].apply(lambda x:' '.join(x.split()[0:1]))

In [104]:
def set_os(inpt):
    if inpt == 'Windows 10' or inpt == 'Windows 7' or inpt == 'Windows 10 S':
        return 'Windows'
    elif inpt == 'macOS' or inpt == 'Mac OS X':
        return 'Mac'
    elif inpt == 'Linux':
        return inpt
    else:
        return 'Other'

data['OpSys'] = data['OpSys'].apply(set_os)

# Remove Unnecessary Columns

In [105]:
data = data.drop(columns=['laptop_ID','Inches','Product','ScreenResolution','Cpu','Gpu'])

# One-Hot Encoding

In [106]:
data = pd.get_dummies(data)

# Split X axis and Y axis

In [107]:
X = data.drop('Price_euros',axis=1)
y = data['Price_euros']

# Split Train and Test Dataset

In [108]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)

# Model Treaing and Accuracy Tets Function

In [109]:
def model_acc(model):
    model.fit(X_train,y_train)
    acc = model.score(X_test,y_test)
    print(str(model)+ '---> ' + str(acc))

# Building Models Using Foure Algorithm

In [110]:
lr = LinearRegression()
model_acc(lr)

lasso = Lasso()
model_acc(lasso)

dt = DecisionTreeRegressor()
model_acc(dt)

rf = RandomForestRegressor()
model_acc(rf)

LinearRegression()---> 0.7052022138589352
Lasso()---> 0.7053433191482803
DecisionTreeRegressor()---> 0.7042687615776438
RandomForestRegressor()---> 0.7660510967616873


# Get Best Model

In [94]:
parameters = {'n_estimators':[10,50,100],
                'criterion':['squared_error','absolute_error','poisson']}

grid_obj = GridSearchCV(estimator=rf,param_grid=parameters)
grid_fit = grid_obj.fit(X_train,y_train)

best_model = grid_fit.best_estimator_
best_model

# Best Model Accuracy Test

In [111]:
best_model.score(X_test,y_test)

0.9243880711706316

# Save the Model as Pickle File

In [113]:
with open('predictor.pickle','wb') as file:
    pickle.dump(best_model,file)