In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the dataset
data = pd.read_csv("/Users/alexanderdelriscomorales/Downloads/AI_ML_Files/Auto.csv")

# Replace '?' with NaN (missing value)
data.replace('?', np.nan, inplace=True)

# Convert columns to numeric data types
data = data.apply(pd.to_numeric, errors='coerce')

# Handle missing values (replace NaN with mean of the column)
data.fillna(data.mean(), inplace=True)

# Define the list of predictor variables
predictors = ['cylinders', 'displacement', 'horsepower', 'weight', 'acceleration', 'year', 'origin']

# Initialize a dictionary to store the results
results = {}

# Loop through each predictor variable
for predictor in predictors:
    # Select the predictor and response variables
    X = data[[predictor]]
    y = data['mpg']
    
    # Split the data into a training set and a testing set (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Initialize and train a linear regression model
    model = LinearRegression()
    model.fit(X_train, y_train)
    
    # Make predictions on the test set
    y_pred = model.predict(X_test)
    
    # Calculate the mean squared error (MSE) to evaluate the model
    mse = mean_squared_error(y_test, y_pred)
    
    # Store the results in the dictionary
    results[predictor] = {
        'Coefficients': model.coef_[0],
        'Intercept': model.intercept_,
        'MSE': mse
    }

# Print the results
for predictor, result in results.items():
    print(f"Predictor: {predictor}")
    print(f"Coefficients: {result['Coefficients']}")
    print(f"Intercept: {result['Intercept']}")
    print(f"Mean Squared Error (MSE): {result['MSE']}")
    print()


Predictor: cylinders
Coefficients: -3.593887055020614
Intercept: 43.103277678094486
Mean Squared Error (MSE): 28.1139504893218

Predictor: displacement
Coefficients: -0.060923087135028794
Intercept: 35.23650478615161
Mean Squared Error (MSE): 23.701871713473047

Predictor: horsepower
Coefficients: -0.1580642372588644
Intercept: 39.87332992769096
Mean Squared Error (MSE): 24.693968995863855

Predictor: weight
Coefficients: -0.007819784906837317
Intercept: 46.64235444447036
Mean Squared Error (MSE): 23.443428465276174

Predictor: acceleration
Coefficients: 1.0916684574292301
Intercept: 6.3893623243885145
Mean Squared Error (MSE): 45.02835033044602

Predictor: year
Coefficients: 1.2878581416981354
Intercept: -74.69630631241105
Mean Squared Error (MSE): 45.25485348058966

Predictor: origin
Coefficients: 5.2967632910994435
Intercept: 15.16051340899802
Mean Squared Error (MSE): 36.215950984956486

