In [None]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

#data = pd.read_csv("carsmall1.csv")
url = 'https://drive.google.com/uc?id={}'.format("1mdZ2M2hJBHYsUk8dRMyRNj_FHie2pqfV")#carsmall1.csv
data = pd.read_csv(url)
y = np.array(data['Horsepower'])

In [12]:
x_a = np.array(data['MPG'])
x_a=x_a.reshape(-1,1)

print("Model with MPG feature only")
for test_size in [0.1, 0.25, 0.5]:
    x_train_a, x_test_a, y_train_a, y_test_a = train_test_split(x_a, y, random_state=0, test_size=test_size)

    model_a=LinearRegression()
    reg_a = model_a.fit(x_train_a, y_train_a)

    w_a = reg_a.coef_.T
    b_a = reg_a.intercept_
    print(f"With test_size = {test_size}")
    print(f"w: {w_a}, b: {b_a}")

    predicted_y_train_a = model_a.predict(x_train_a)
    predicted_y_test_a = model_a.predict(x_test_a)

    print(f"RMS train: {np.sqrt(np.mean((y_train_a - predicted_y_train_a) ** 2))}")
    print(f"RMS test : {np.sqrt(np.mean((y_test_a - predicted_y_test_a) ** 2))}\n")


With test_size = 0.1
w: [-4.44525956], b: 215.1733890979309
RMS train: 26.588711846995785
RMS test : 29.38683449938521

With test_size = 0.25
w: [-3.93466224], b: 200.93014424178915
RMS train: 22.407410758537424
RMS test : 38.1284538053723

With test_size = 0.5
w: [-4.20630579], b: 205.6808140495868
RMS train: 21.351312350217164
RMS test : 31.94569407869715



In [15]:
x_b = np.column_stack((np.array(data['MPG']), np.array(data['MPG']) ** 2))

print("Model with MPG, MPG**2 features")
for test_size in [0.1, 0.25, 0.5]:
    x_train_b, x_test_b, y_train_b, y_test_b = train_test_split(x_b, y, random_state=0, test_size=test_size)

    model_b = LinearRegression()
    reg_b = model_b.fit(x_train_b, y_train_b)

    w_b = reg_b.coef_.T
    b_b = reg_b.intercept_
    print(f"With test_size = {test_size}")
    print(f"w: {w_b}, b: {b_b}")

    predicted_y_train_b = model_b.predict(x_train_b)
    predicted_y_test_b = model_b.predict(x_test_b)

    print(f"RMS train: {np.sqrt(np.mean((y_train_b - predicted_y_train_b) ** 2))}")
    print(f"RMS test : {np.sqrt(np.mean((y_test_b - predicted_y_test_b) ** 2))}\n")

With test_size = 0.1
w: [-16.2096824    0.23722435], b: 345.55119345516385
RMS train: 20.551737105507296
RMS test : 28.373002260411248

With test_size = 0.25
w: [-14.20976669   0.20460416], b: 316.6151534574984
RMS train: 16.93065024769068
RMS test : 32.10014758031733

With test_size = 0.5
w: [-13.13693292   0.181579  ], b: 305.0061638128428
RMS train: 16.739986852408776
RMS test : 26.255166945657017



In [18]:
x_c = np.column_stack((np.array(data['MPG']), np.array(data['MPG']) ** 2, np.array(data['Weight'])))

print("Model with MPG, MPG**2, Weight features")
for test_size in [0.1, 0.25, 0.5]:
    x_train_c, x_test_c, y_train_c, y_test_c = train_test_split(x_c, y, random_state=0, test_size=test_size)

    model_c=LinearRegression()
    reg_c = model_c.fit(x_train_c, y_train_c)

    w_c = reg_c.coef_.T
    b_c = reg_c.intercept_
    print(f"With test_size = {test_size}")
    print(f"w: {w_c}, b: {b_c}")

    predicted_y_train_c = model_c.predict(x_train_c)
    predicted_y_test_c = model_c.predict(x_test_c)

    print(f"RMS train: {np.sqrt(np.mean((y_train_c - predicted_y_train_c) ** 2))}")
    print(f"RMS test : {np.sqrt(np.mean((y_test_c - predicted_y_test_c) ** 2))}\n")


Model with MPG, MPG**2, Weight features
With test_size = 0.1
w: [-8.88387008  0.13846027  0.02805658], b: 149.55013080540806
RMS train: 18.485081598078015
RMS test : 31.37254510860941

With test_size = 0.25
w: [-10.19056717   0.15064916   0.01589543], b: 207.589677996371
RMS train: 16.22211901613297
RMS test : 30.049599984159585

With test_size = 0.5
w: [-1.05593346e+01  1.48360440e-01  1.01917352e-02], b: 234.50106210868518
RMS train: 16.467332060852094
RMS test : 24.98262791975454



\begin{table}
\begin{tabular}{|l|c|c|c|c|c|c|}
\hline
 & \multicolumn{2}{c|}{MPG} & \multicolumn{2}{c|}{MPG, MPG\textsuperscript{2}} & \multicolumn{2}{c|}{MPG, MPG\textsuperscript{2}, Weight} \\ \hline
test\_size & RMS\_ train & RMS\_ test & RMS\_ train & RMS\_ test & RMS\_ train & RMS\_ test \\ \hline
0.1        & 26.59      & 29.39      & 20.55       & 28.37      & 18.49       & 31.37     \\ \hline
0.25       & 22.41      & 38.13      & 16.93       & 32.10      & 16.22       & 30.05     \\ \hline
0.5        & 21.35      & 31.95      & 16.74       & 26.26      & 16.47       & 24.98     \\ \hline
\end{tabular}
\end{table}

* The RMS train values generally decrease as the test size increases, indicating potentially better training performance with larger test sizes.
* The RMS test values vary across different models and test sizes, suggesting a complex relationship between model complexity, training, and testing performance.
* The model evaluated with MPG, MPG², and Weight tends to have lower RMS train and RMS test values compared to the other models, especially noticeable at a test size of 0.5.