# MSE and R-Squared Examples

## Scaling the Data

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_error, r2_score

# Generate sample data (linear relationship)
np.random.seed(42)
x = np.arange(10)
y = 2 * x + 5 + np.random.normal(0, 1, 10)  # Add some noise

# Scale features (x) by a factor of 10
x_scaled = x * 10

# Fit linear regression models
coeffs = np.polyfit(x, y, 1)
y_pred = np.polyval(coeffs, x)

coeffs_scaled = np.polyfit(x_scaled, y, 1)
y_pred_scaled = np.polyval(coeffs_scaled, x_scaled)

# Calculate MSE and R-squared for both models
mse = mean_squared_error(y, y_pred)
mse_scaled = mean_squared_error(y, y_pred_scaled)

r2 = r2_score(y, y_pred)
r2_scaled = r2_score(y, y_pred_scaled)

print("Original MSE:", mse)
print("Scaled MSE:", mse_scaled)
print("\nOriginal R-squared:", r2)
print("Scaled R-squared:", r2_scaled)


Original MSE: 0.4700741203958067
Scaled MSE: 0.4700741203958067

Original R-squared: 0.9858593511600999
Scaled R-squared: 0.9858593511600999


## Different MSE ~ Same R-Squared

In [None]:
import pandas as pd

# Create a dictionary with the data
data = {'X': [1, 2, 3, 4, 5],
        'Set A': [2, 4, 5, 8, 10],
        'Set B': [7, 9, 13, 18, 22]}

df = pd.DataFrame(data)

# Fit linear regression models
coeffs1 = np.polyfit(df['X'], df['Set A'], 1)
y_pred1 = np.polyval(coeffs1, df['X'])

coeffs2 = np.polyfit(df['X'], df['Set B'], 1)
y_pred2 = np.polyval(coeffs2, df['X'])

# Calculate MSE and R-squared for both models
mse1 = mean_squared_error(df['Set A'], y_pred1)
mse2 = mean_squared_error(df['Set B'], y_pred2)

r21 = r2_score(df['Set A'], y_pred1)
r22 = r2_score(df['Set B'], y_pred2)

print("MSE 1:", mse1)
print("MSE 2:", mse2)
print("R-squared 1:", r21)
print("R-squared 2:", r22)

MSE 1: 0.15999999999999986
MSE 2: 0.5399999999999997
R-squared 1: 0.9803921568627452
R-squared 2: 0.9825581395348837


## Advertising

In [None]:
import pandas as pd

df = pd.read_csv('https://raw.githubusercontent.com/gitmystuff/Datasets/main/Advertising.csv', usecols=['TV', 'radio', 'newspaper', 'sales'])
print(df.shape)
print(df.info())
print(df.describe())
df.head()

(200, 4)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 4 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   TV         200 non-null    float64
 1   radio      200 non-null    float64
 2   newspaper  200 non-null    float64
 3   sales      200 non-null    float64
dtypes: float64(4)
memory usage: 6.4 KB
None
               TV       radio   newspaper       sales
count  200.000000  200.000000  200.000000  200.000000
mean   147.042500   23.264000   30.554000   14.022500
std     85.854236   14.846809   21.778621    5.217457
min      0.700000    0.000000    0.300000    1.600000
25%     74.375000    9.975000   12.750000   10.375000
50%    149.750000   22.900000   25.750000   12.900000
75%    218.825000   36.525000   45.100000   17.400000
max    296.400000   49.600000  114.000000   27.000000


Unnamed: 0,TV,radio,newspaper,sales
0,230.1,37.8,69.2,22.1
1,44.5,39.3,45.1,10.4
2,17.2,45.9,69.3,9.3
3,151.5,41.3,58.5,18.5
4,180.8,10.8,58.4,12.9


## Analysis with TV, Radio, and Newspaper

In [None]:
# train test split with TV, radio, and newspaper
from sklearn.model_selection import train_test_split

X = df.drop('sales', axis=1)
y = df['sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# statsmodel coefficients
import statsmodels.api as sm

X_train = sm.add_constant(X_train)
model = sm.OLS(y_train, X_train).fit()
model.summary()

0,1,2,3
Dep. Variable:,sales,R-squared:,0.906
Model:,OLS,Adj. R-squared:,0.903
Method:,Least Squares,F-statistic:,434.5
Date:,"Tue, 23 Jul 2024",Prob (F-statistic):,1.88e-69
Time:,16:22:59,Log-Likelihood:,-262.21
No. Observations:,140,AIC:,532.4
Df Residuals:,136,BIC:,544.2
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.7089,0.374,7.250,0.000,1.970,3.448
TV,0.0441,0.002,27.219,0.000,0.041,0.047
radio,0.1993,0.010,20.195,0.000,0.180,0.219
newspaper,0.0069,0.007,0.988,0.325,-0.007,0.021

0,1,2,3
Omnibus:,68.437,Durbin-Watson:,2.285
Prob(Omnibus):,0.0,Jarque-Bera (JB):,325.342
Skew:,-1.709,Prob(JB):,2.25e-71
Kurtosis:,9.64,Cond. No.,500.0


In [None]:
from statsmodels.tools.eval_measures import mse, rmse

X_test = sm.add_constant(X_test)
predictions = model.predict(X_test)
print("MSE:", mse(y_test, predictions))
print("RMSE:", rmse(y_test, predictions))

MSE: 3.796797236715215
RMSE: 1.9485372043446374


## Analysis with TV and Radio

In [None]:
# statsmodel coefficients
import statsmodels.api as sm

X_train.drop(['newspaper'], axis=1, inplace=True)
X_test.drop(['newspaper'], axis=1, inplace=True)
model2 = sm.OLS(y_train, X_train).fit()
predictions = model2.predict(X_test)
print("MSE:", mse(y_test, predictions))
print("RMSE:", rmse(y_test, predictions))

MSE: 3.669047054530682
RMSE: 1.915475673176426


In [None]:
model.summary()

0,1,2,3
Dep. Variable:,sales,R-squared:,0.906
Model:,OLS,Adj. R-squared:,0.903
Method:,Least Squares,F-statistic:,434.5
Date:,"Tue, 23 Jul 2024",Prob (F-statistic):,1.88e-69
Time:,16:23:00,Log-Likelihood:,-262.21
No. Observations:,140,AIC:,532.4
Df Residuals:,136,BIC:,544.2
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.7089,0.374,7.250,0.000,1.970,3.448
TV,0.0441,0.002,27.219,0.000,0.041,0.047
radio,0.1993,0.010,20.195,0.000,0.180,0.219
newspaper,0.0069,0.007,0.988,0.325,-0.007,0.021

0,1,2,3
Omnibus:,68.437,Durbin-Watson:,2.285
Prob(Omnibus):,0.0,Jarque-Bera (JB):,325.342
Skew:,-1.709,Prob(JB):,2.25e-71
Kurtosis:,9.64,Cond. No.,500.0


In [None]:
model2.summary()

0,1,2,3
Dep. Variable:,sales,R-squared:,0.905
Model:,OLS,Adj. R-squared:,0.903
Method:,Least Squares,F-statistic:,651.3
Date:,"Tue, 23 Jul 2024",Prob (F-statistic):,1.06e-70
Time:,16:23:00,Log-Likelihood:,-262.71
No. Observations:,140,AIC:,531.4
Df Residuals:,137,BIC:,540.2
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
const,2.8376,0.350,8.103,0.000,2.145,3.530
TV,0.0441,0.002,27.234,0.000,0.041,0.047
radio,0.2026,0.009,21.837,0.000,0.184,0.221

0,1,2,3
Omnibus:,72.173,Durbin-Watson:,2.283
Prob(Omnibus):,0.0,Jarque-Bera (JB):,377.67
Skew:,-1.78,Prob(JB):,9.77e-83
Kurtosis:,10.216,Cond. No.,463.0
