In [11]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [12]:
df=pd.read_csv('laptop_data.csv')

In [13]:
df=df.drop(columns=['Unnamed: 0','TypeName','Inches','ScreenResolution','Gpu','OpSys','Weight'])

In [14]:
df['Cpu'] = df['Cpu'].apply(lambda x:" ".join(x.split()[0:3]))

In [18]:
df['Storage'] = df['Memory'].apply(lambda x:" ".join(x.split()[0:1]))

In [21]:
df=df.drop(columns=['Memory'])

In [22]:
df.head()

Unnamed: 0,Company,Cpu,Ram,Price,Storage
0,Apple,Intel Core i5,8GB,71378.6832,128GB
1,Apple,Intel Core i5,8GB,47895.5232,128GB
2,HP,Intel Core i5,8GB,30636.0,256GB
3,Apple,Intel Core i7,16GB,135195.336,512GB
4,Apple,Intel Core i5,8GB,96095.808,256GB


In [23]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1303 entries, 0 to 1302
Data columns (total 5 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Company  1303 non-null   object 
 1   Cpu      1303 non-null   object 
 2   Ram      1303 non-null   object 
 3   Price    1303 non-null   float64
 4   Storage  1303 non-null   object 
dtypes: float64(1), object(4)
memory usage: 51.0+ KB


In [24]:
X = df.drop(['Price'], axis=1)
y = df['Price']

In [27]:
categorical_cols = ['Company', 'Cpu', 'Storage', 'Ram']

In [28]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score

In [30]:
categorical_transformer = Pipeline(steps=[
    ('onehot', OneHotEncoder(handle_unknown='ignore'))
])

preprocessor = ColumnTransformer(
    transformers=[
        ('cat', categorical_transformer, categorical_cols)
    ])

In [31]:
model_pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', LinearRegression())
])

In [32]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [35]:
# Fit the model pipeline to the training data
model_pipeline.fit(X_train, y_train)

In [37]:
y_pred = model_pipeline.predict(X_test)

In [38]:
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

In [39]:
print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

Mean Squared Error: 416806167.4923459
R-squared Score: 0.7109288093392097


In [42]:
import joblib

joblib.dump(model_pipeline, 'laptop_price_prediction_model.joblib')

['laptop_price_prediction_model.joblib']

In [43]:
joblib.dump(model_pipeline, 'laptop_price.pkl')

['laptop_price.pkl']

In [44]:
print("Model saved as laptop_price_predictor.pkl")

Model saved as laptop_price_predictor.pkl


In [49]:
y_test.iloc[0]

89084.16

In [46]:
df.head()

Unnamed: 0,Company,Cpu,Ram,Price,Storage
0,Apple,Intel Core i5,8GB,71378.6832,128GB
1,Apple,Intel Core i5,8GB,47895.5232,128GB
2,HP,Intel Core i5,8GB,30636.0,256GB
3,Apple,Intel Core i7,16GB,135195.336,512GB
4,Apple,Intel Core i5,8GB,96095.808,256GB


In [78]:
sample_input = pd.DataFrame({
    'Company': ['Asus'],
    'Cpu': ['Intel Core i3'],
    'Ram': ['8GB'],
    'Storage': ['256GB']
})

In [79]:
predicted_price = model_pipeline.predict(sample_input)

In [80]:
print(f"Predicted Price: {predicted_price[0]} INR")

Predicted Price: 45493.618098791594 INR


In [82]:
pickle.dump(df,open('df.pkl','wb'))