In [13]:
import pandas as pd
import joblib
import json

from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import r2_score

In [14]:
df = pd.read_csv(r"C:\Users\sidch\Downloads\laptop_prices.csv")

df = df.drop_duplicates()
df.drop(["Product","GPU_model",'CPU_model'], axis=1, inplace=True)

X = df.drop("Price_euros", axis=1)
y = df["Price_euros"]

num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
cat_cols = X.select_dtypes(include=["object"]).columns.tolist()


num_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="median")),
    ("scaler", StandardScaler())
])

cat_pipeline = Pipeline([
    ("imputer", SimpleImputer(strategy="most_frequent")),
    ("encoder", OneHotEncoder(handle_unknown="ignore",drop="first"))
])

preprocessor = ColumnTransformer([
    ("num", num_pipeline, num_cols),
    ("cat", cat_pipeline, cat_cols)
])


model = RandomForestRegressor(
    n_estimators=300,
    max_depth=18,
    random_state=42,
    n_jobs=-1
)


pipeline = Pipeline([
    ("preprocessing", preprocessor),
    ("model", model)
])


X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)


In [15]:
pipeline.fit(X_train, y_train)

y_pred = pipeline.predict(X_test)
r2 = r2_score(y_test, y_pred)

print("Model R2 Score:", r2)


joblib.dump(pipeline, "laptop_model.pkl")


metadata = {
    "r2_score": round(r2 * 100, 2),
    "num_columns": num_cols,
    "cat_columns": cat_cols
}

with open("model_info.json", "w") as f:
    json.dump(metadata, f, indent=4)


print("Model & Metadata Saved Successfully!")


Model R2 Score: 0.848133603674511
Model & Metadata Saved Successfully!


In [12]:
!streamlit run app.py

^C
