In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
import numpy as np


In [2]:
# Load the dataset
data = pd.read_csv('/home/balendran/stockprice_prediction/dataset/CarPrice_Assignment.csv')

# Encode categorical features
label_encoders = {}
for column in data.select_dtypes(include=['object']).columns:
    le = LabelEncoder()
    data[column] = le.fit_transform(data[column])
    label_encoders[column] = le

# Split the data into features and target
X = data.drop('price', axis=1)
y = data['price']

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [3]:
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)
lr_predictions = lr_model.predict(X_test)


In [4]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)
rf_predictions = rf_model.predict(X_test)


In [5]:
xgb_model = XGBRegressor(objective='reg:squarederror', n_estimators=100, random_state=42)
xgb_model.fit(X_train, y_train)
xgb_predictions = xgb_model.predict(X_test)


In [6]:
# Function to evaluate model
def evaluate_model(model_name, predictions):
    mse = mean_squared_error(y_test, predictions)
    r2 = r2_score(y_test, predictions)
    print(f'{model_name} - Mean Squared Error: {mse}, R^2 Score: {r2}')

# Evaluate all models
evaluate_model('Linear Regression', lr_predictions)
evaluate_model('Random Forest Regressor', rf_predictions)
evaluate_model('XGBoost Regressor', xgb_predictions)


Linear Regression - Mean Squared Error: 12306121.302934568, R^2 Score: 0.844115853666815
Random Forest Regressor - Mean Squared Error: 3381317.803615219, R^2 Score: 0.9571681583236088
XGBoost Regressor - Mean Squared Error: 6387778.092302323, R^2 Score: 0.9190847131787238
