## LOADING DATA ##

In [47]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from ISLP.models import ModelSpec as MS
import matplotlib.pyplot as plt
import seaborn as sns

In [48]:
# Load dataset
Auto = pd.read_csv("Auto.csv")

# Replace '?' with NaN (if present)
Auto.replace('?', np.nan, inplace=True)

# Convert horsepower to numeric (if needed)
Auto['horsepower'] = pd.to_numeric(Auto['horsepower'])

# Drop missing values
Auto = Auto.dropna()

# View first rows
print(Auto.head())


    mpg  cylinders  displacement  horsepower  weight  acceleration  \
0  18.0          8         307.0       130.0    3504          12.0   
1  15.0          8         350.0       165.0    3693          11.5   
2  18.0          8         318.0       150.0    3436          11.0   
3  16.0          8         304.0       150.0    3433          12.0   
4  17.0          8         302.0       140.0    3449          10.5   

   model year  origin                   car name  
0          70       1  chevrolet chevelle malibu  
1          70       1          buick skylark 320  
2          70       1         plymouth satellite  
3          70       1              amc rebel sst  
4          70       1                ford torino  


In [49]:
# Select predictors and response
X = Auto[['horsepower', 'weight', 'displacement']]
y = Auto['mpg']


In [50]:


# Fit OLS regression
model = sm.OLS(y, X).fit()

# Print summary
print(model.summary())


                                 OLS Regression Results                                
Dep. Variable:                    mpg   R-squared (uncentered):                   0.865
Model:                            OLS   Adj. R-squared (uncentered):              0.864
Method:                 Least Squares   F-statistic:                              831.4
Date:                Sat, 14 Feb 2026   Prob (F-statistic):                   9.31e-169
Time:                        21:31:43   Log-Likelihood:                         -1420.8
No. Observations:                 392   AIC:                                      2848.
Df Residuals:                     389   BIC:                                      2860.
Df Model:                           3                                                  
Covariance Type:            nonrobust                                                  
                   coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------

In [52]:
# New observation
new_car = pd.DataFrame({
    'horsepower': [110],
    'weight': [2600],
    'displacement': [160]
})
# Predict MPG
predicted_mpg = model.predict(new_car)

print("Predicted MPG:", predicted_mpg.iloc[0])


Predicted MPG: 22.794420842174784


In [53]:
# Predictions on training data
y_pred = model.predict(X)

# Calculate metrics
from sklearn.metrics import mean_squared_error, r2_score

print("R2 Score:", r2_score(y, y_pred))
print("RMSE:", np.sqrt(mean_squared_error(y, y_pred)))


R2 Score: -0.35556539944224963
RMSE: 9.075674399611449
