# Import the Libraries

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Import the Data

In [3]:
# Import the Data
dataset = pd.read_csv('Data.csv')
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# Split the Dataset Into Training and Testing sets

In [4]:
from sklearn.model_selection import train_test_split
# Split the Dataset Into Training and Testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=0) 

# Multiple Linear Regression Model

## Training the Model

In [5]:
from sklearn.linear_model import LinearRegression
# Multiple Linear Regression Model
regressor = LinearRegression()
regressor.fit(X_train, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


## Predicting the Test set Results

In [6]:
y_pred_lr = regressor.predict(X_test)
# Set the precision for numpy print options
np.set_printoptions(precision=2)
# Predicting the Test set Results
print(np.concatenate((y_pred_lr.reshape(len(y_pred_lr), 1), y_test.reshape(len(y_test), 1)), axis=1))

[[431.43 431.23]
 [458.56 460.01]
 [462.75 461.14]
 ...
 [469.52 473.26]
 [442.42 438.  ]
 [461.88 463.28]]


## Evaluate the LR Model Performance

In [7]:
# Evaluate the LR Model Performance
from sklearn.metrics import r2_score
r2_score_lr = r2_score(y_test, y_pred_lr)
print(f'R^2 Score for Linear Regression: {r2_score_lr:.4f}')
# The R^2 score indicates the proportion of variance in the dependent variable 
# that can be explained by the independent variables in the model. A higher R^2 score indicates a better fit of the

R^2 Score for Linear Regression: 0.9325


# Polynomial Linear Regression Model

In [8]:
## Polynomial Linear Regression Model
from sklearn.preprocessing import PolynomialFeatures
# Create polynomial features
poly_reg = PolynomialFeatures(degree=4)
X_poly = poly_reg.fit_transform(X_train)
# Fit the polynomial regression model
poly_regressor = LinearRegression()
poly_regressor.fit(X_poly, y_train)

0,1,2
,fit_intercept,True
,copy_X,True
,tol,1e-06
,n_jobs,
,positive,False


## Predicting the Test Results

In [9]:
y_pred_plr = poly_regressor.predict(poly_reg.fit_transform(X_test))
# Predicting the Test Results   
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_plr.reshape(len(y_pred_plr), 1), y_test.reshape(len(y_test), 1)), axis=1))


[[434.16 431.23]
 [458.26 460.01]
 [460.72 461.14]
 ...
 [469.49 473.26]
 [438.53 438.  ]
 [461.62 463.28]]


## Evaluate the Polynomial LR Model Performance

In [10]:
# evaluate the Polynomial LR Model Performance
r2_score_plr = r2_score(y_test, y_pred_plr)
print(f'R^2 Score for Polynomial Linear Regression: {r2_score_plr:.4f}')
# The R^2 score indicates the proportion of variance in the dependent variable 
# that can be explained by the independent variables in the model. A higher R^2 score indicates a better fit of the model to the data.
# Polynomial regression can capture non-linear relationships, which may lead to a higher R^2 score
# compared to linear regression, especially when the relationship between the independent and dependent variables is non-linear.
# However, a very high R^2 score may also indicate overfitting, especially if the polynomial degree is too high.

R^2 Score for Polynomial Linear Regression: 0.9455


# Support Vector Regressor Model

## Preparing Data for Feature Scaling

In [11]:
# Reshape the data for SVR
y_SVR = y.reshape(len(y), 1) 

## Spilt the Dataset with The Reshaped Label for Features Scaling

In [12]:
X_train_SVR, X_test_SVR, y_train_SVR, y_test_SVR = train_test_split(X, y_SVR,
                                                                    test_size=0.2,
                                                                    random_state=0) 

## Feature Scaling

In [13]:
from sklearn.preprocessing import StandardScaler
# Feature Scaling
sc_X = StandardScaler()
sc_y = StandardScaler()
X_train_SVR = sc_X.fit_transform(X_train_SVR)
y_train_SVR = sc_y.fit_transform(y_train_SVR) 

## Train the SVR Model on the Scaled Training Set

In [14]:
# Trin the SVR Model on the Scaled Training Set
from sklearn.svm import SVR
regressor_SVR = SVR(kernel='rbf')
regressor_SVR.fit(X_train_SVR, y_train_SVR.ravel()) 

0,1,2
,kernel,'rbf'
,degree,3
,gamma,'scale'
,coef0,0.0
,tol,0.001
,C,1.0
,epsilon,0.1
,shrinking,True
,cache_size,200
,verbose,False


# Predict the Testing Set Results

In [15]:
# Predict the Testing Set Results
y_pred_SVR = sc_y.inverse_transform(regressor_SVR.predict(sc_X.transform(X_test)).reshape(-1, 1))
# Predicting the Test set Results
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_SVR.reshape(len(y_pred_SVR), 1), y_test.reshape(len(y_test), 1)), axis=1))

[[434.05 431.23]
 [457.94 460.01]
 [461.03 461.14]
 ...
 [470.6  473.26]
 [439.42 438.  ]
 [460.92 463.28]]


## Evaluate the SVR Model Performance

In [16]:
# Evaluate the SVR Model Performance
from sklearn.metrics import r2_score
r2_score_SVR = r2_score(y_test, y_pred_SVR)
print(f'R^2 Score for SVR: {r2_score_SVR:.4f}')
# The R^2 score indicates the proportion of variance in the dependent variable 
# that can be explained by the independent variables in the model. A higher R^2 score indicates a better fit of the model to the data.
# SVR can capture non-linear relationships,
# which may lead to a higher R^2 score compared to linear regression, especially when the
# relationship between the independent and dependent variables is non-linear.

R^2 Score for SVR: 0.9481


# Decision Tree Model

## Training the DT Model

In [17]:
from sklearn.tree import DecisionTreeRegressor
# Decision Tree Regression Model
regressor_DT = DecisionTreeRegressor(random_state=0)
regressor_DT.fit(X_train, y_train)  

0,1,2
,criterion,'squared_error'
,splitter,'best'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,
,random_state,0
,max_leaf_nodes,
,min_impurity_decrease,0.0


## Predict the Test Set Results

In [18]:
y_pred_DT = regressor_DT.predict(X_test)
# Predicting the Test set Results
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_DT.reshape(len(y_pred_DT), 1), y_test.reshape(len(y_test), 1)), axis=1))

[[431.28 431.23]
 [459.59 460.01]
 [460.06 461.14]
 ...
 [471.46 473.26]
 [437.76 438.  ]
 [462.74 463.28]]


## Evaluating the DT Model Performance

In [19]:
# Evaluate the DT Model Performance
r2_score_DT = r2_score(y_test, y_pred_DT)
print(f'R^2 Score for Decision Tree Regression: {r2_score_DT:.4f}')
# The R^2 score indicates the proportion of variance in the dependent variable 
# that can be explained by the independent variables in the model. A higher R^2 score indicates a better fit of the model to the data.
# Decision Tree Regression can capture non-linear relationships,
# which may lead to a higher R^2 score compared to linear regression, especially when the
# relationship between the independent and dependent variables is non-linear.
# However, a very high R^2 score may also indicate overfitting, especially if the tree is too deep or complex.

R^2 Score for Decision Tree Regression: 0.9229


# Random Forest Model

## Training the RF Model

In [20]:
from sklearn.ensemble import RandomForestRegressor
# Random Forest Model
regressor_RF = RandomForestRegressor(n_estimators=10, random_state=0)  
regressor_RF.fit(X_train, y_train)

0,1,2
,n_estimators,10
,criterion,'squared_error'
,max_depth,
,min_samples_split,2
,min_samples_leaf,1
,min_weight_fraction_leaf,0.0
,max_features,1.0
,max_leaf_nodes,
,min_impurity_decrease,0.0
,bootstrap,True


## Predict the Test Set Results

In [21]:
y_pred_RF = regressor_RF.predict(X_test)
# Predicting the Test set Results
np.set_printoptions(precision=2)
print(np.concatenate((y_pred_RF.reshape(len(y_pred_RF), 1), y_test.reshape(len(y_test), 1)), axis=1))

[[434.05 431.23]
 [458.79 460.01]
 [463.02 461.14]
 ...
 [469.48 473.26]
 [439.57 438.  ]
 [460.38 463.28]]


## Evaluating the RF Model Performance

In [22]:
# Evaluate the RF Model Performance 
r2_score_RF = r2_score(y_test, y_pred_RF)
print(f'R^2 Score for Random Forest Regression: {r2_score_RF:.4f}')
# The R^2 score indicates the proportion of variance in the dependent variable 
# that can be explained by the independent variables in the model. A higher R^2 score indicates a better fit of the model to the data.
# Random Forest Regression can capture non-linear relationships,
# which may lead to a higher R^2 score compared to linear regression, especially when the
# relationship between the independent and dependent variables is non-linear.
# However, a very high R^2 score may also indicate overfitting, especially if the forest is too deep or complex.

R^2 Score for Random Forest Regression: 0.9616


# Overall Models Performance Comparison 

In [24]:
# Table to compare the performance of all models
models_performance = pd.DataFrame({
    'Model': ['Linear Regression', 'Polynomial Linear Regression', 'Support Vector Regression',
            'Decision Tree Regression', 'Random Forest Regression'],
    'R^2 Score': [r2_score_lr, r2_score_plr, r2_score_SVR,
                r2_score_DT, r2_score_RF]
})
print(models_performance)   

                          Model  R^2 Score
0             Linear Regression   0.932532
1  Polynomial Linear Regression   0.945526
2     Support Vector Regression   0.948078
3      Decision Tree Regression   0.922906
4      Random Forest Regression   0.961591
