In [2]:
import numpy as np  # Importing NumPy for numerical computations
import pandas as pd  # Importing pandas for data manipulation and analysis
import matplotlib.pyplot as plt  # Importing Matplotlib for data visualization
from sklearn.model_selection import train_test_split  # Importing train_test_split to split data into training and testing sets

In [3]:
data = pd.read_csv("Data\Advertising.csv")  # Reading the CSV file into a pandas DataFrame

In [4]:
data.head() # Displays the first few rows of the DataFrame 

Unnamed: 0.1,Unnamed: 0,TV,radio,newspaper,sales
0,1,230.1,37.8,69.2,22.1
1,2,44.5,39.3,45.1,10.4
2,3,17.2,45.9,69.3,9.3
3,4,151.5,41.3,58.5,18.5
4,5,180.8,10.8,58.4,12.9


In [5]:
features = ["TV", "radio", "newspaper"]  # Defining the feature columns (independent variables)
X = data[features]  # Extracting the feature columns from the DataFrame
y = data["sales"]  # Extracting the target variable (dependent variable)

In [None]:
X.head() # Displays the first few rows of feature

Unnamed: 0,TV,radio,newspaper
0,230.1,37.8,69.2
1,44.5,39.3,45.1
2,17.2,45.9,69.3
3,151.5,41.3,58.5
4,180.8,10.8,58.4


In [None]:
y.head() # Displays the first few rows of target

0    22.1
1    10.4
2     9.3
3    18.5
4    12.9
Name: sales, dtype: float64

In [None]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=1) # Splitting the dataset into training and testing sets

In [None]:
# Adding a bias (intercept) term to the feature matrices
X_train = np.column_stack((np.ones(X_train.shape[0]), X_train))  
X_test = np.column_stack((np.ones(X_test.shape[0]), X_test))  

# Converting target variables to NumPy arrays
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

# Calculating regression coefficients using the Normal Equation
beta = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train  

# Predicting values for the training set
y_pred = X_train @ beta  

# Computing Residual Sum of Squares (RSS) and Total Sum of Squares (TSS)
RSS = np.sum((y_train - y_pred) ** 2)
TSS = np.sum((y_train - y_train.mean()) ** 2)

# Calculating R-squared (R²) score
R2 = 1 - RSS / TSS  

# Computing Residual Standard Error (RSE)
n, p = X_train.shape  # Number of samples (n) and features (p including intercept)
RSE = np.sqrt(RSS / (n - p))  

# Calculating F-statistic
F_statistic = ((TSS - RSS) / (p - 1)) / (RSS / (n - p))  

# Printing results
print(f"Coefficients: {beta}")
print(f"RSS: {RSS:.4f}")
print(f"R²: {R2:.4f}")
print(f"RSE: {RSE:.4f}")
print(f"F-statistic: {F_statistic:.4f}")


Coefficients: [2.87696662 0.04656457 0.17915812 0.00345046]
RSS: 463.01870202437027
R2: 0.890307557755665
RSE: 1.7808315396942125
F-statistic: 394.9980532623856


In [6]:
# Selecting 'TV' as the single predictor (independent variable)
xt = data["TV"]  

# Selecting 'sales' as the target variable (dependent variable)
y = data["sales"]  

# Splitting data into training and testing sets
xt_train, xt_test, y_train, y_test = train_test_split(xt, y, random_state=1)  

# Reshaping data to be a 2D array (required for matrix operations)
xt_train = xt_train.to_numpy().reshape(-1, 1)  
xt_test = xt_test.to_numpy().reshape(-1, 1)  

# Adding an intercept (bias) term to the feature matrices
X_train = np.column_stack((np.ones(xt_train.shape[0]), xt_train))  
X_test = np.column_stack((np.ones(xt_test.shape[0]), xt_test))  

# Calculating regression coefficients using the Normal Equation
beta = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train  

# Predicting values for the training set
y_pred = X_train @ beta  

# Computing Residual Sum of Squares (RSS) and Total Sum of Squares (TSS)
RSS = np.sum((y_train - y_pred) ** 2)  
TSS = np.sum((y_train - y_train.mean()) ** 2)  

# Calculating R-squared (R²) score
R2 = 1 - RSS / TSS  

# Computing Residual Standard Error (RSE)
n, p = X_train.shape  # Number of samples (n) and parameters (p, including intercept)
RSE = np.sqrt(RSS / (n - p))  

# Calculating F-statistic
F_statistic = ((TSS - RSS) / (p - 1)) / (RSS / (n - p))  

# Printing results
print(f"Coefficients: {beta}")
print(f"RSS: {RSS:.4f}")
print(f"R²: {R2:.4f}")
print(f"RSE: {RSE:.4f}")
print(f"F-statistic: {F_statistic:.4f}")


Coefficients: [6.91197262 0.04802945]
RSS: 1587.8472
R²: 0.6238
RSE: 3.2755
F-statistic: 245.4367


In [None]:
# Selecting 'radio' as the predictor (independent variable)
xt = data["radio"]  

# Selecting 'sales' as the target variable (dependent variable)
y = data["sales"]  

# Splitting data into training and testing sets
xt_train, xt_test, y_train, y_test = train_test_split(xt, y, random_state=1)  

# Reshaping data into a 2D array for matrix operations
xt_train = xt_train.to_numpy().reshape(-1, 1)  
xt_test = xt_test.to_numpy().reshape(-1, 1)  

# Adding an intercept (bias) term to the feature matrices
X_train = np.column_stack((np.ones(xt_train.shape[0]), xt_train))  
X_test = np.column_stack((np.ones(xt_test.shape[0]), xt_test))  

# Computing regression coefficients using the Normal Equation
beta = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train  

# Predicting sales values for the training set
y_pred = X_train @ beta  

# Calculating Residual Sum of Squares (RSS) and Total Sum of Squares (TSS)
RSS = np.sum((y_train - y_pred) ** 2)  
TSS = np.sum((y_train - y_train.mean()) ** 2)  

# Calculating R-squared (R²) score
R2 = 1 - RSS / TSS  

# Computing Residual Standard Error (RSE)
n, p = X_train.shape  # Number of samples (n) and parameters (p, including intercept)
RSE = np.sqrt(RSS / (n - p))  

# Calculating F-statistic
F_statistic = ((TSS - RSS) / (p - 1)) / (RSS / (n - p))  

# Displaying results
print(f"Coefficients: {beta}")
print(f"RSS: {RSS:.4f}")
print(f"R²: {R2:.4f}")
print(f"RSE: {RSE:.4f}")
print(f"F-statistic: {F_statistic:.4f}")


Coefficients: [9.33859455 0.19338969]
RSS: 2937.6263521070696
R2: 0.3040553058546194
RSE: 4.455202206794451
F-statistic: 64.6605766881287


In [7]:
# Selecting 'newspaper' as the single predictor (independent variable)
xt = data["newspaper"]  

# Selecting 'sales' as the target variable (dependent variable)
y = data["sales"]  

# Splitting data into training and testing sets
xt_train, xt_test, y_train, y_test = train_test_split(xt, y, random_state=1)  

# Reshaping data into a 2D array (required for matrix operations)
xt_train = xt_train.to_numpy().reshape(-1, 1)  
xt_test = xt_test.to_numpy().reshape(-1, 1)  

# Adding an intercept (bias) term to the feature matrices
X_train = np.column_stack((np.ones(xt_train.shape[0]), xt_train))  
X_test = np.column_stack((np.ones(xt_test.shape[0]), xt_test))  

# Calculating regression coefficients using the Normal Equation
beta = np.linalg.inv(X_train.T @ X_train) @ X_train.T @ y_train  

# Predicting values for the training set
y_pred = X_train @ beta  

# Computing Residual Sum of Squares (RSS) and Total Sum of Squares (TSS)
RSS = np.sum((y_train - y_pred) ** 2)  
TSS = np.sum((y_train - y_train.mean()) ** 2)  

# Calculating R-squared (R²) score
R2 = 1 - RSS / TSS  

# Computing Residual Standard Error (RSE)
n, p = X_train.shape  # Number of samples (n) and parameters (p, including intercept)
RSE = np.sqrt(RSS / (n - p))  

# Calculating F-statistic
F_statistic = ((TSS - RSS) / (p - 1)) / (RSS / (n - p))  

# Printing results
print(f"Coefficients: {beta}")
print(f"RSS: {RSS:.4f}")
print(f"R²: {R2:.4f}")
print(f"RSE: {RSE:.4f}")
print(f"F-statistic: {F_statistic:.4f}")


Coefficients: [11.76557671  0.06888299]
RSS: 3898.6081
R²: 0.0764
RSE: 5.1324
F-statistic: 12.2411
