# Laptop Price Prediction

#### Loading the Data-Set for the Project

In [None]:
import pandas as pd
import numpy as np
import matplotlib as plt
import seaborn as sns

In [None]:
data = pd.read_csv("../input/laptop-prices/laptops.csv", encoding='latin-1')
data.head()

In [None]:
data.info()

# Data Cleaning Process
<ol>

<li> Remove extra unnecessary details form Product Columns
 <li> Extract CPU Vender, CPU Type and CPU Speed in Different Columns
 <li> Convert Ram into Numeric Values
 <li> Extract Memory type from Memory Column
 <li> Convert Weight into numeric Values
 <li> Extract GPU Vender, GPU Type in Different Columns
 <li> Extract IPS and Touchscreen Feature form ScreenResolution Column
 </ol>

## Remove extra unnecessary details form Product Columns

In [None]:
data["Product"] = data["Product"].str.split("(").apply(lambda x: x[0])

 ## Extract CPU Vender, CPU Type and CPU Speed in Different Columns

In [None]:
data["Cpu_Speed"] = data["Cpu"].str.split(" ").apply(lambda x: x[-1]).str.replace("GHz", "")
data["Cpu_Vender"] = data["Cpu"].str.split(" ").apply(lambda x: x[0])
data["Cpu_Type"] = data["Cpu"].str.split(" ").apply(lambda x: x[1:4] if x[1] == "Celeron" and "Pentium" and "Xeon" else (x[1:3] if (x[1] == "Core" or x[0] == "AMD") else x[0]) )
data["Cpu_Type"] = data["Cpu_Type"].apply(lambda x: ' '.join(x))
data["Cpu_Type"]
data.head()

## Extract Memory type from Memory Column

In [None]:
split_mem = data['Memory'].str.split(' ', 1, expand=True)
data['Storage Type'] = split_mem[1]
data['Memory'] = split_mem[0]
data["Memory"].unique()
data.head()

In [None]:
data["Ram"] = data["Ram"].str.replace("GB", "")

df_mem= data['Memory'].str.split('(\d+)',  expand=True)
data['Memory'] = pd.to_numeric(df_mem[1])
data.rename(columns={'Memory':'Memory (GB or TB)'}, inplace=True)

def mem(x):
    if x == 1:
        return 1024
    elif x == 2:
        return 2048
data['Memory (GB or TB)'] = data['Memory (GB or TB)'].apply(lambda x: 1024 if x==1 else x)
data['Memory (GB or TB)'] = data['Memory (GB or TB)'].apply(lambda x: 2048 if x==2 else x)
data.rename(columns={'Memory (GB or TB)':'Storage (GB)'}, inplace=True)
data.head()

## Convert Weight into numeric Values

In [None]:
data["Weight"] = data["Weight"].str.replace("kg", "")
data.head()

## Extract GPU Vender, GPU Type in Different Columns

In [None]:
gpu_distribution_list = data["Gpu"].str.split(" ")
#data["Gpu_Vender"] = data["Gpu"].str.split(" ").apply(lambda x: x[0:2]  if x[0] == "Intel" else x[0]  if x[0] == "Intel Iris" else x[0])
data["Gpu_Vender"] = data["Gpu"].str.split(" ").apply(lambda x: x[0])
data["Gpu_Type"] = data["Gpu"].str.split(" ").apply(lambda x: x[1:])
data["Gpu_Type"] = data["Gpu_Type"].apply(lambda x: ' '.join(x))
data.head()

 ## Extract IPS and Touchscreen Feature form ScreenResolution Column

In [None]:
data['Touchscreen'] = data['ScreenResolution'].apply(lambda x:1 if 'Touchscreen' in x else 0)
data['Ips'] = data['ScreenResolution'].apply(lambda x:1 if 'IPS' in x else 0)


 ## Catorizing The Operating System

In [None]:
def cat_os(inp):
    if inp == 'Windows 10' or inp == 'Windows 7' or inp == 'Windows 10 S':
        return 'Windows'
    elif inp == 'macOS' or inp == 'Mac OS X':
        return 'Mac'
    else:
        return 'Others/No OS/Linux'

data['OpSys'] = data['OpSys'].apply(cat_os)

 ## Fetching Out The Use Full Columns the Leaving The Rest

In [None]:
data = data.reindex(columns=["Company", "TypeName", "Inches", "Touchscreen", "Ips", "Cpu_Vender", "Cpu_Type","Ram", "Storage (GB)", "Storage Type", "Gpu_Vender", "Gpu_Type", "Weight", "OpSys", "Price_euros" ])

In [None]:
data.head()

In [None]:
data.info()

# Transforming the Data Type of some of the Columns
<ol>
<li> Ram
<li> Storage
<li> Weight
</ol>

In [None]:
data["Ram"] = data["Ram"].astype("int")
data["Storage (GB)"] = data["Storage (GB)"].astype("int")
data["Weight"] = data["Weight"].astype("float")

In [None]:
data.info()

In [None]:
backup = data.to_csv("./laptop_price(3rd).csv")

 # Analysing Relation with The Clean Data

In [None]:
sns.set(rc={"figure.figsize":(9, 5)})

In [None]:
data["Company"].value_counts().plot(kind="bar")

In [None]:
sns.barplot(x=data["Company"], y=data["Price_euros"])

In [None]:
data["TypeName"].value_counts().plot(kind="bar")

In [None]:
sns.barplot(x=data['TypeName'],y=data['Price_euros'])

In [None]:
cpu_distribution = data["Cpu_Type"].value_counts()[:10].reset_index()
cpu_distribution

In [None]:
sns.barplot(x=cpu_distribution["index"], y=cpu_distribution["Cpu_Type"], hue="Cpu_Vender", data=data)

In [None]:
gpu_distribution = data["Gpu_Type"].value_counts()[:10].reset_index()
gpu_distribution

In [None]:
sns.barplot(x=gpu_distribution["index"], y=gpu_distribution["Gpu_Type"], hue="Gpu_Vender", data=data)

In [None]:
sns.barplot(x=data['OpSys'],y=data['Price_euros'])

In [None]:
corr_data = data.corr()
corr_data["Price_euros"].sort_values(ascending=False)

In [None]:
sns.heatmap(data.corr())

# Result of Analysing Data
 <ol>
 <li> Dell, Lenovo and HP are dominant company in Selling Laptops
 <li> The Average Price of the Laptop in all Branda lies between 1500 to 200 Euros
 <li> In The Laptop Type NoteBook is a Dominant Factor as its Price is also preety low compare to its popularity and other Laptop Type
 <li> In CPU and GPU section Intel is the Dominant Member where in Cpu Intel Core i7 and in Gpu Intel HD Graphics is most Popular 
 <li> In Operating System Mac is the clear Winner but Windows is also not much Far away
 <li> According to the Analysis the Atribute which makes the most price Difference of the laptops are Ram, IPS Display, Weight and Touchscreen

In [None]:
X = data.drop(columns=['Price_euros'])
y = np.log(data['Price_euros'])

# Using Machine Learning to Predict Laptop Price
 
 ### I have tested this data with many well-Know Algorithims and i got the best result from VotingRegression Algorithm you can also Use any other Algorithm too.

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=2)

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score,mean_absolute_error

In [None]:
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,ExtraTreesRegressor
from xgboost import XGBRegressor

In [None]:

from sklearn.ensemble import VotingRegressor,StackingRegressor

step1 = ColumnTransformer(transformers=[
    ('col_tnf',OneHotEncoder(sparse=False, handle_unknown='ignore'),[0,1,5,6,9,10,11,13])
],remainder='passthrough')


rf = RandomForestRegressor(n_estimators=350,random_state=3,max_samples=0.5,max_features=0.75,max_depth=15)
gbdt = GradientBoostingRegressor(n_estimators=100,max_features=0.5)
xgb = XGBRegressor(n_estimators=25,learning_rate=0.3,max_depth=5)
et = ExtraTreesRegressor(n_estimators=100,random_state=3,max_samples=0.5,max_features=0.75,max_depth=10)

step2 = VotingRegressor([('rf', rf), ('gbdt', gbdt), ('xgb',xgb), ('et',et)],weights=[5,1,1,1])

pipe = Pipeline([
    ('step1',step1),
    ('step2',step2)
])

pipe.fit(X_train,y_train)

y_pred = pipe.predict(X_test)

print('R2 score',r2_score(y_test,y_pred))
print('MAE',mean_absolute_error(y_test,y_pred))

 # Exporting the Model

In [None]:
import pickle
pickle.dump(pipe,open('./laptop_price(3rd).pkl','wb'))