# **Laptop Price Prediction**

In [136]:
import pandas as pd

In [137]:
df = pd.read_csv('Laptops.csv')
df.head()

Unnamed: 0,id,Brand,Model Name,Processor,Operating System,Storage,RAM,Screen Size,Touch_Screen,Price
0,0,HP,15s-fq5007TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹38,990"
1,1,HP,15s-fy5003TU,Core i3,Windows 11 Home,512 GB,8 GB,39.62 cm (15.6 Inch),No,"₹37,990"
2,2,Apple,2020 Macbook Air,M1,Mac OS Big Sur,256 GB,8 GB,33.78 cm (13.3 inch),No,"₹70,990"
3,3,Apple,2020 Macbook Air,M1,Mac OS Big Sur,256 GB,8 GB,33.78 cm (13.3 inch),No,"₹70,990"
4,4,Apple,2020 Macbook Air,M1,Mac OS Big Sur,256 GB,8 GB,33.78 cm (13.3 inch),No,"₹70,990"


In [138]:
df.isnull().sum()

id                   0
Brand                0
Model Name           0
Processor            0
Operating System     0
Storage             12
RAM                  0
Screen Size          0
Touch_Screen         0
Price                0
dtype: int64

In [139]:
df['Storage'] = df['Storage'].fillna(value=df['Storage'].mode()[0])

In [140]:
df.isnull().sum()

id                  0
Brand               0
Model Name          0
Processor           0
Operating System    0
Storage             0
RAM                 0
Screen Size         0
Touch_Screen        0
Price               0
dtype: int64

In [141]:
pd.DataFrame(pd.DataFrame(df.groupby('Storage'))[0])

Unnamed: 0,0
0,1 TB
1,128 GB
2,2 TB
3,256 GB
4,3 TB
5,4 TB
6,512 GB
7,6 TB
8,64 GB


### Converting Numeric Columns into numeric types

In [142]:
def convert_storage(storage):
    numeric_part, unit = storage[:-2], storage[-2:]
    numeric_part = int(numeric_part)
    if unit == 'TB':
        numeric_part *= 1024
    return numeric_part

df['Storage'] = df['Storage'].apply(convert_storage)

In [143]:
pd.DataFrame(pd.DataFrame(df.groupby('RAM'))[0])

Unnamed: 0,0
0,12 GB
1,16 GB
2,18 GB
3,32 GB
4,4 GB
5,64 GB
6,8 GB


In [144]:
pd.DataFrame(pd.DataFrame(df.groupby('Screen Size'))[0]).head()

Unnamed: 0,0
0,100.63 cm (39.62 cm)
1,17.78 cm (7 Inch)
2,26.67 cm (10.5 inch)
3,29.46 cm (11.6 Inch)
4,29.46 cm (11.6 inch)


In [145]:
df['RAM'] = df['RAM'].astype(str).str.extract('(\d+)').astype(int)
df['Screen Size'] = df['Screen Size'].astype(str).str.extract(r'([\d.]+)').astype(float)
df['Price'] = df['Price'].astype(str).str.replace(r'[^\d]', '', regex=True).astype(int)

In [146]:
pd.DataFrame(df['Screen Size']).head()

Unnamed: 0,Screen Size
0,39.62
1,39.62
2,33.78
3,33.78
4,33.78


In [147]:
df.head()

Unnamed: 0,id,Brand,Model Name,Processor,Operating System,Storage,RAM,Screen Size,Touch_Screen,Price
0,0,HP,15s-fq5007TU,Core i3,Windows 11 Home,512,8,39.62,No,38990
1,1,HP,15s-fy5003TU,Core i3,Windows 11 Home,512,8,39.62,No,37990
2,2,Apple,2020 Macbook Air,M1,Mac OS Big Sur,256,8,33.78,No,70990
3,3,Apple,2020 Macbook Air,M1,Mac OS Big Sur,256,8,33.78,No,70990
4,4,Apple,2020 Macbook Air,M1,Mac OS Big Sur,256,8,33.78,No,70990


### Split Train Test data

In [148]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['id','Price']),df['Price'],test_size=0.2)

### Separating columns of different types

In [149]:
numerical_columns = [4,5,6]
ordinal_columns = [7]
nominal_columns = [0,1,2,3]

### Importing required libararies

In [150]:
from sklearn.preprocessing import OrdinalEncoder,OneHotEncoder
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LinearRegression

In [151]:
impute_num = Pipeline(steps=[
    ('impute_num',SimpleImputer(strategy='mean'))
])

In [152]:
encode_ord = Pipeline(steps=[
    ('impute_cat',SimpleImputer(strategy='most_frequent')),
    ('encode_ord',OrdinalEncoder())
])

In [153]:
encode_nom = Pipeline(steps=[
    ('impute_cat',SimpleImputer(strategy='most_frequent')),
    ('encode_nom',OneHotEncoder(drop='first', handle_unknown='ignore'))
])

In [154]:
preprocessing = ColumnTransformer(transformers=[
    ('impute_num',impute_num,numerical_columns),
    ('encode_ord',encode_ord,ordinal_columns),
    ('encode_nom',encode_nom,nominal_columns)
],remainder='passthrough')

In [155]:
model = LinearRegression()

In [156]:
pipe = Pipeline(steps=[
    ('preprocessor',preprocessing),
    ('model',model)
])

In [157]:
pipe.fit(X_train,y_train)

In [164]:
from sklearn.metrics import mean_squared_error, r2_score

y_train_pred = pipe.predict(X_train)
y_test_pred = pipe.predict(X_test)

train_rmse = mean_squared_error(y_train, y_train_pred, squared=False)
test_rmse = mean_squared_error(y_test, y_test_pred, squared=False)

train_r2 = r2_score(y_train, y_train_pred)
test_r2 = r2_score(y_test, y_test_pred)

print("Training RMSE:", train_rmse)
print("Testing RMSE:", test_rmse)
print("Training R^2 score:", train_r2)
print("Testing R^2 score:", test_r2)

Training RMSE: 1882.0786569746044
Testing RMSE: 30386.320876631464
Training R^2 score: 0.9971258473116092
Testing R^2 score: 0.5587970147138768




In [165]:
import pandas as pd

brand = input("Enter the Brand: ")
model_name = input("Enter the Model Name: ")
processor = input("Enter the Processor: ")
os = input("Enter the Operating System: ")
storage = float(input("Enter the Storage (GB): "))
ram = float(input("Enter the RAM (GB): "))
screen_size = float(input("Enter the Screen Size (inches): "))
touch_screen = input("Does it have a Touch Screen? (Yes/No): ")

custom_input = pd.DataFrame({
    'Brand': [brand],
    'Model Name': [model_name],
    'Processor': [processor],
    'Operating System': [os],
    'Storage': [storage],
    'RAM': [ram],
    'Screen Size': [screen_size],
    'Touch_Screen': [touch_screen]
})

custom_predictions = pipe.predict(custom_input)

print("Custom Price Predictions:", custom_predictions)

Custom Input Predictions: [7125.75716049]
