In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('laptop_data.csv')

In [3]:
df.drop(columns=['Unnamed: 0'],inplace=True)

In [4]:
df['Ram'] = df['Ram'].str.replace('GB','')
df['Weight'] = df['Weight'].str.replace('kg','')

In [5]:
df['Ram'] = df['Ram'].astype('int32')
df['Weight'] = df['Weight'].astype('float32')

In [6]:
df['Ips'] = df['ScreenResolution'].apply(lambda x:1 if 'IPS' in x else 0)

In [7]:
new = df['ScreenResolution'].str.split('x',n=1,expand=True)

In [8]:
df['X_res'] = new[0]
df['Y_res'] = new[1]

In [9]:
df['X_res'] = df['X_res'].str.replace(',','').str.findall(r'(\d+\.?\d+)').apply(lambda x:x[0])

In [10]:
df['X_res'] = df['X_res'].astype('int')
df['Y_res'] = df['Y_res'].astype('int')

In [11]:
df['ppi'] = (((df['X_res']**2) + (df['Y_res']**2))**0.5/df['Inches']).astype('float')

In [12]:
df.drop(columns=['ScreenResolution'],inplace=True)

In [13]:
df.drop(columns=['Inches','X_res','Y_res'],inplace=True)

In [14]:
df['Cpu Name'] = df['Cpu'].apply(lambda x:" ".join(x.split()[0:3]))

In [15]:
def fetch_processor(text):
    if text == 'Intel Core i7' or text == 'Intel Core i5' or text == 'Intel Core i3':
        return text
    else:
        if text.split()[0] == 'Intel':
            return 'Other Intel Processor'
        else:
            return 'AMD Processor'

In [16]:
df['Cpu brand'] = df['Cpu Name'].apply(fetch_processor)

In [17]:
df.drop(columns=['Cpu','Cpu Name'],inplace=True)

In [18]:
df['Memory'] = df['Memory'].astype(str).replace('\.0', '', regex=True)
df["Memory"] = df["Memory"].str.replace('GB', '')
df["Memory"] = df["Memory"].str.replace('TB', '000')
new = df["Memory"].str.split("+", n = 1, expand = True)

df["first"]= new[0]
df["first"]=df["first"].str.strip()

df["second"]= new[1]

df["Layer1HDD"] = df["first"].apply(lambda x: 1 if "HDD" in x else 0)
df["Layer1SSD"] = df["first"].apply(lambda x: 1 if "SSD" in x else 0)
df["Layer1Hybrid"] = df["first"].apply(lambda x: 1 if "Hybrid" in x else 0)
df["Layer1Flash_Storage"] = df["first"].apply(lambda x: 1 if "Flash Storage" in x else 0)

df['first'] = df['first'].str.replace(r'\D', '')

df["second"].fillna("0", inplace = True)

df["Layer2HDD"] = df["second"].apply(lambda x: 1 if "HDD" in x else 0)
df["Layer2SSD"] = df["second"].apply(lambda x: 1 if "SSD" in x else 0)
df["Layer2Hybrid"] = df["second"].apply(lambda x: 1 if "Hybrid" in x else 0)
df["Layer2Flash_Storage"] = df["second"].apply(lambda x: 1 if "Flash Storage" in x else 0)

df['second'] = df['second'].str.replace(r'\D', '')

df["first"] = df["first"].astype(int)
df["second"] = df["second"].astype(int)

df["HDD"]=(df["first"]*df["Layer1HDD"]+df["second"]*df["Layer2HDD"])
df["SSD"]=(df["first"]*df["Layer1SSD"]+df["second"]*df["Layer2SSD"])
df["Hybrid"]=(df["first"]*df["Layer1Hybrid"]+df["second"]*df["Layer2Hybrid"])
df["Flash_Storage"]=(df["first"]*df["Layer1Flash_Storage"]+df["second"]*df["Layer2Flash_Storage"])

df.drop(columns=['first', 'second', 'Layer1HDD', 'Layer1SSD', 'Layer1Hybrid',
       'Layer1Flash_Storage', 'Layer2HDD', 'Layer2SSD', 'Layer2Hybrid',
       'Layer2Flash_Storage'],inplace=True)

  df['Memory'] = df['Memory'].astype(str).replace('\.0', '', regex=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["second"].fillna("0", inplace = True)


  df['Memory'] = df['Memory'].astype(str).replace('\.0', '', regex=True)


ValueError: invalid literal for int() with base 10: '128 SSD'

In [None]:
df.drop(columns=['Memory'],inplace=True)

In [None]:
df.drop(columns=['Hybrid','Flash_Storage'],inplace=True)

In [None]:
df['Gpu brand'] = df['Gpu'].apply(lambda x:x.split()[0])

In [None]:
df = df[df['Gpu brand'] != 'ARM']

In [None]:
df.drop(columns=['Gpu'],inplace=True)

In [None]:
def cat_os(inp):
    if inp == 'Windows 10' or inp == 'Windows 7' or inp == 'Windows 10 S':
        return 'Windows'
    elif inp == 'macOS' or inp == 'Mac OS X':
        return 'Mac'
    else:
        return 'Others/No OS/Linux'

In [None]:
df['os'] = df['OpSys'].apply(cat_os)

In [None]:
df.drop(columns=['OpSys'],inplace=True)

In [None]:
df.drop(columns=['TypeName','Ips', 'ppi', 'Gpu brand'], inplace = True)
X = df.drop(columns=['Price'])
y = np.log(df['Price'])

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.15,random_state=2)

In [None]:
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import r2_score,mean_absolute_error
from sklearn.ensemble import RandomForestRegressor,GradientBoostingRegressor,AdaBoostRegressor,ExtraTreesRegressor


In [None]:
step1 = ColumnTransformer(transformers=[
    ('col_tnf',OneHotEncoder(sparse=False,drop='first'),[0, 3, 6])
],remainder='passthrough')

step2 = RandomForestRegressor(n_estimators=100,
                              random_state=2,
                              max_samples=0.5,
                              max_features=0.75,
                              max_depth=15)

pipe = Pipeline([
    ('step1',step1),
    ('step2',step2)
])

pipe.fit(X_train,y_train)

y_pred = pipe.predict(X_test)

print('R2 score',r2_score(y_test,y_pred))
print('MAE',mean_absolute_error(y_test,y_pred))



R2 score 0.843308073971324
MAE 0.1853634853943851


In [None]:

columns = ['Company', 'Ram', 'Weight', 'Cpu brand', 'HDD', 'SSD', 'os']

user_input = []
for column in columns:
    value = input(f"Enter {column}: ")
    user_input.append(value)

user_input_df = pd.DataFrame([user_input], columns=columns)

print(user_input_df)


Enter Company: Dell
Enter Ram: 8
Enter Weight: 1.5
Enter Cpu brand: Intel Core i5
Enter HDD: 0
Enter SSD: 256
Enter os: Windows
  Company Ram Weight      Cpu brand HDD  SSD       os
0    Dell   8    1.5  Intel Core i5   0  256  Windows


In [None]:
y_pred = pipe.predict(user_input_df)
y_pred
log_price = y_pred

# Convert back to the normal amount
normal_price = np.exp(log_price)
print(normal_price)

[65616.08320285]


In [None]:
import pickle

with open('model.pkl', 'wb') as file:
    pickle.dump(pipe, file)