In [3]:
# Step 1: Libraries import karo
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Step 2: Dataset load karo
df = pd.read_csv("car_prices.csv")
print(df.head())

# Step 3: Features aur target define karo
X = df[["Age", "Mileage", "Brand"]]  # Input columns
y = df["Price"]                      # Target column (Price)

# Step 4: Brand (categorical) ko encode karna hai
# OneHotEncoder ka use karenge pipeline ke through
categorical_features = ["Brand"]
numeric_features = ["Age", "Mileage"]

# Step 5: Column transformer define karo for encoding
preprocessor = ColumnTransformer(
    transformers=[
        ("cat", OneHotEncoder(), categorical_features)
    ],
    remainder='passthrough'  # numeric features ko as-it-is rakhna
)

# Step 6: Pipeline banayein (encoding + model training)
pipeline = Pipeline(steps=[
    ("preprocess", preprocessor),
    ("model", LinearRegression())
])

# Step 7: Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 8: Model train karo
pipeline.fit(X_train, y_train)

# Step 9: Predict karo
y_pred = pipeline.predict(X_test)

# Step 10: Evaluation (Regression ke liye)
print("Mean Squared Error (MSE):", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))


   Age  Mileage   Brand  Price
0    1     5000  Toyota  15000
1    2    15000   Honda  14000
2    3    25000     BMW  30000
3    4    30000    Ford  12000
4    5    35000  Nissan  11000
Mean Squared Error (MSE): 731426.7716784774
R2 Score: 0.9826080820472753
