# Linear Regression Validation

This notebook uses linear regression implementations to validate the linear regression model of the `rust-ml` library. We'll use the same dataset to train a linear regression model.

In [None]:
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

In [None]:
import os
df = pd.read_csv('../datasets/advertising.csv')
df.describe()

In [140]:
# Normalize the dataset
df = (df - df.mean(axis=0)) / df.std(axis=0)
df.describe()

Unnamed: 0,TV,Radio,Newspaper,Sales
count,200.0,200.0,200.0,200.0
mean,1.287859e-16,-4.263256e-16,2.309264e-16,-2.842171e-16
std,1.0,1.0,1.0,1.0
min,-1.704546,-1.566936,-1.389161,-2.560707
25%,-0.8464055,-0.8950745,-0.817499,-0.7817154
50%,0.03153601,-0.02451705,-0.2205833,0.1645567
75%,0.8360974,0.8931886,0.6679027,0.7417827
max,1.739664,1.773849,3.831556,2.246355


In [141]:
# Split the dataset into training and testing sets
X = df[["TV", "Radio", "Newspaper"]].values
y = df["Sales"].values
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8)

In [148]:
# Create a linear regression model
import time
model = LinearRegression()
tic = time.time()
model.fit(X_train, y_train)
tok = time.time()
print("Training time:", tok - tic)


Training time: 0.0005171298980712891


In [144]:

mse = model.score(X_test, y_test)

print("Model coefficients:", model.coef_)
print("Model intercept:", model.intercept_)
print("Mean Squared Error:", mse)

Model coefficients: [0.87288215 0.32052642 0.0207583 ]
Model intercept: -0.0067498162449043525
Mean Squared Error: 0.8370477252121606
