In [1]:
import pandas as pd

df = pd.read_csv("../data/processed/clean_laptop_prices.csv")
df.head()


Unnamed: 0.1,Unnamed: 0,Company,TypeName,Inches,ScreenResolution,Cpu,Ram,Memory,Gpu,OpSys,Weight,Price
0,0,1,4,13.3,23,65,8,4,58,8,38,71378.6832
1,1,1,4,13.3,1,63,8,2,51,8,35,47895.5232
2,2,7,3,15.6,8,74,8,16,53,4,74,30636.0
3,3,1,4,15.4,25,85,1,29,9,8,71,135195.336
4,4,1,4,13.3,23,67,8,16,59,8,38,96095.808


In [2]:
X = df.drop("Price", axis=1)
y = df["Price"]


In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)


In [4]:
import joblib

preprocessor = joblib.load("../models/preprocessor.pkl")


In [5]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression

pipeline = Pipeline([
    ("preprocessing", preprocessor),
    ("model", LinearRegression())
])


In [6]:
pipeline.fit(X_train, y_train)


In [7]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

y_pred = pipeline.predict(X_test)

mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("MAE:", mae)
print("RMSE:", rmse)
print("R2 Score:", r2)


MAE: 19810.89154929154
RMSE: 29354.388563101104
R2 Score: 0.4023915191963233


In [8]:
joblib.dump(pipeline, "../models/laptop_price_model_v1.pkl")


['../models/laptop_price_model_v1.pkl']

## End-to-End Training Pipeline (v1)

This pipeline includes:
- Preprocessing using ColumnTransformer
- Linear Regression model
- End-to-end training and prediction

The model is versioned as v1 for reproducibility and future upgrades.
