<a href="https://colab.research.google.com/github/charana13/Stock_Predictions/blob/main/Build%20Regession%20Models%20using%20Regression%20Techniques.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np

# Generate synthetic data
np.random.seed(0)
X = 2 * np.random.rand(100, 1)
#it works on a formula 4+3*X+noise
y = 4 + 3 * X + np.random.randn(100, 1)


In [5]:
from sklearn.model_selection import train_test_split

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)


In [6]:
import numpy as np

# Gradient Descent function
def gradient_descent(X, y, learning_rate=0.01, iterations=1000):
    m, n = X.shape
    ones = np.ones((m, 1))
    X = np.concatenate((ones, X), axis=1)  # Add a column of ones to X for the bias term

    theta = np.zeros((n + 1, 1))  # Initialize coefficients, including bias term
    cost_history = []

    for i in range(iterations):
        error = X.dot(theta) - y #The function returns the optimized coefficients (theta)
        gradient = 2/m * X.T.dot(error)
        theta -= learning_rate * gradient
        cost = np.sum(error**2) / (2 * m)  # Mean squared error as the cost function
        cost_history.append(cost)

    return theta, cost_history

# Example usage:
# Assuming X_train and y_train are your training data
theta, cost_history = gradient_descent(X_train, y_train, learning_rate=0.01, iterations=1000)

# Make predictions using the trained coefficients
m, n = X_test.shape
ones = np.ones((m, 1))
X_test_with_bias = np.concatenate((ones, X_test), axis=1)  # Add a column of ones for bias
predictions = X_test_with_bias.dot(theta)
print(predictions)


[[ 5.12521968]
 [ 7.39046613]
 [ 7.86460217]
 [ 5.23228956]
 [ 4.50409894]
 [ 8.54176354]
 [ 4.39098774]
 [ 7.87701763]
 [ 5.51592473]
 [ 5.36276719]
 [ 4.87890441]
 [ 5.05631669]
 [ 6.03626146]
 [ 9.78960079]
 [ 9.58791632]
 [ 5.84792807]
 [ 7.02215016]
 [ 4.97567879]
 [ 7.65990177]
 [10.01658246]]


In [7]:
# Linear Regression using Least Squares Method
from sklearn.linear_model import LinearRegression

# Create linear regression object
lr = LinearRegression()

# Train the model
# The fit method finds the optimal coefficients for the linear equation based on the provided training data
lr.fit(X_train, y_train)

# Make predictions
#The predicted values are stored in the lr_pred variable.
lr_pred = lr.predict(X_test)
print(X_test)


[[0.28670657]
 [1.04649611]
 [1.20552675]
 [0.32261904]
 [0.07837558]
 [1.43265441]
 [0.04043679]
 [1.20969104]
 [0.41775351]
 [0.36638272]
 [0.20408962]
 [0.26359572]
 [0.5922804 ]
 [1.85119328]
 [1.783546  ]
 [0.52911122]
 [0.92295872]
 [0.23654885]
 [1.1368679 ]
 [1.92732552]]


In [None]:
# Polynomial Regression
from sklearn.preprocessing import PolynomialFeatures

# Add polynomial features (degree=2)
poly_features = PolynomialFeatures(degree=2)
#This line transforms the original input features X_train into polynomial features. The fit_transform method generates a new feature matrix consisting of all polynomial combinations of the input features with degree less than or equal to 2.
X_poly_train = poly_features.fit_transform(X_train)
# Similar to the training data, the test data (X_test) is also transformed into polynomial features using the same poly_features object.
X_poly_test = poly_features.transform(X_test)

# Create linear regression object
poly_lr = LinearRegression()

# Train the model
poly_lr.fit(X_poly_train, y_train)

# Make predictions
poly_pred = poly_lr.predict(X_poly_test)
print(X_poly_test)

[[1.00000000e+00 2.86706575e-01 8.22006600e-02]
 [1.00000000e+00 1.04649611e+00 1.09515410e+00]
 [1.00000000e+00 1.20552675e+00 1.45329475e+00]
 [1.00000000e+00 3.22619036e-01 1.04083042e-01]
 [1.00000000e+00 7.83755845e-02 6.14273225e-03]
 [1.00000000e+00 1.43265441e+00 2.05249865e+00]
 [1.00000000e+00 4.04367949e-02 1.63513438e-03]
 [1.00000000e+00 1.20969104e+00 1.46335241e+00]
 [1.00000000e+00 4.17753512e-01 1.74517997e-01]
 [1.00000000e+00 3.66382724e-01 1.34236300e-01]
 [1.00000000e+00 2.04089621e-01 4.16525736e-02]
 [1.00000000e+00 2.63595725e-01 6.94827061e-02]
 [1.00000000e+00 5.92280395e-01 3.50796066e-01]
 [1.00000000e+00 1.85119328e+00 3.42691655e+00]
 [1.00000000e+00 1.78354600e+00 3.18103634e+00]
 [1.00000000e+00 5.29111224e-01 2.79958688e-01]
 [1.00000000e+00 9.22958725e-01 8.51852807e-01]
 [1.00000000e+00 2.36548852e-01 5.59553593e-02]
 [1.00000000e+00 1.13686790e+00 1.29246862e+00]
 [1.00000000e+00 1.92732552e+00 3.71458366e+00]]


In [8]:
# LASSO Regression
from sklearn.linear_model import Lasso

# Create LASSO regression object
lasso = Lasso(alpha=0.1)  # You can tune the alpha parameter
#The alpha values used in the code examples are arbitrary and are meant for demonstration purposes.
# Train the model
lasso.fit(X_train, y_train)

# Make predictions
lasso_pred = lasso.predict(X_test)
print(X_test)
print(X_train, y_train)

[[0.28670657]
 [1.04649611]
 [1.20552675]
 [0.32261904]
 [0.07837558]
 [1.43265441]
 [0.04043679]
 [1.20969104]
 [0.41775351]
 [0.36638272]
 [0.20408962]
 [0.26359572]
 [0.5922804 ]
 [1.85119328]
 [1.783546  ]
 [0.52911122]
 [0.92295872]
 [0.23654885]
 [1.1368679 ]
 [1.92732552]]
[[0.12045094]
 [1.31265918]
 [1.08976637]
 [0.9373024 ]
 [1.34127574]
 [0.6308567 ]
 [0.87517442]
 [0.00939095]
 [0.12829499]
 [0.56561393]
 [0.31793917]
 [0.63596636]
 [0.6371379 ]
 [1.67588981]
 [0.87720303]
 [1.88933783]
 [1.5563135 ]
 [1.30621665]
 [0.2763659 ]
 [1.47852716]
 [1.43037873]
 [0.22075028]
 [1.39526239]
 [0.87406391]
 [0.8473096 ]
 [0.1742586 ]
 [1.66523969]
 [0.7190158 ]
 [1.88749616]
 [1.29178823]
 [1.33482076]
 [0.48885118]
 [1.09762701]
 [0.0375796 ]
 [1.04369664]
 [1.14039354]
 [1.05778984]
 [1.23527099]
 [1.56105835]
 [1.97674768]
 [1.58345008]
 [1.54846738]
 [1.64198646]
 [0.50658321]
 [0.23745544]
 [0.53077898]
 [0.91230066]
 [1.13320291]
 [0.14207212]
 [1.8585924 ]
 [1.7400243 ]
 [0.8

In [9]:
# Ridge Regression
from sklearn.linear_model import Ridge

# Create Ridge regression object
ridge = Ridge(alpha=1.0)  # You can tune the alpha parameter

# Train the model
ridge.fit(X_train, y_train)
print(X_train, y_train)
# Make predictions
ridge_pred = ridge.predict(X_test)
print(X_test)

[[0.12045094]
 [1.31265918]
 [1.08976637]
 [0.9373024 ]
 [1.34127574]
 [0.6308567 ]
 [0.87517442]
 [0.00939095]
 [0.12829499]
 [0.56561393]
 [0.31793917]
 [0.63596636]
 [0.6371379 ]
 [1.67588981]
 [0.87720303]
 [1.88933783]
 [1.5563135 ]
 [1.30621665]
 [0.2763659 ]
 [1.47852716]
 [1.43037873]
 [0.22075028]
 [1.39526239]
 [0.87406391]
 [0.8473096 ]
 [0.1742586 ]
 [1.66523969]
 [0.7190158 ]
 [1.88749616]
 [1.29178823]
 [1.33482076]
 [0.48885118]
 [1.09762701]
 [0.0375796 ]
 [1.04369664]
 [1.14039354]
 [1.05778984]
 [1.23527099]
 [1.56105835]
 [1.97674768]
 [1.58345008]
 [1.54846738]
 [1.64198646]
 [0.50658321]
 [0.23745544]
 [0.53077898]
 [0.91230066]
 [1.13320291]
 [0.14207212]
 [1.8585924 ]
 [1.7400243 ]
 [0.82932388]
 [0.72742154]
 [0.04021509]
 [1.65788006]
 [0.19219682]
 [1.95723668]
 [0.57881219]
 [1.95352218]
 [0.24039312]
 [1.27984204]
 [1.23386799]
 [0.82852599]
 [0.42076512]
 [1.3636406 ]
 [0.73745034]
 [0.93262155]
 [1.13608912]
 [1.15189299]
 [1.95291893]
 [0.18788102]
 [1.22

In [11]:
#optional
from sklearn.model_selection import GridSearchCV

# LASSO Regression with Cross-Validation
lasso = Lasso()
lasso_params = {'alpha': [0.01, 0.1, 1, 10, 100]}  # Specify a list of alpha values to try
lasso_grid = GridSearchCV(lasso, param_grid=lasso_params, cv=5)  # 5-fold cross-validation
lasso_grid.fit(X_train, y_train)
optimal_lasso_alpha = lasso_grid.best_params_['alpha']
print("Optimal LASSO Alpha:", optimal_lasso_alpha)

# Ridge Regression with Cross-Validation
ridge = Ridge()
ridge_params = {'alpha': [0.01, 0.1, 1, 10, 100]}  # Specify a list of alpha values to try
ridge_grid = GridSearchCV(ridge, param_grid=ridge_params, cv=5)  # 5-fold cross-validation
ridge_grid.fit(X_train, y_train)
optimal_ridge_alpha = ridge_grid.best_params_['alpha']
print("Optimal Ridge Alpha:", optimal_ridge_alpha)


Optimal LASSO Alpha: 0.1
Optimal Ridge Alpha: 1


In [None]:
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
import math

# Evaluate models with RMSE and MAE
for i, model in enumerate(models):
    mse = mean_squared_error(y_test, predictions[i])
    #RMSE provides the average magnitude of errors in predictions.
    rmse = math.sqrt(mse)  # RMSE calculation
    #MAE represents the average magnitude of errors in predictions.
    mae = mean_absolute_error(y_test, predictions[i])  # MAE calculation
    r2 = r2_score(y_test, predictions[i])
    print(f"{model} - RMSE: {rmse:.2f}, MAE: {mae:.2f}, R-squared: {r2:.2f}")


Least Squares - RMSE: 1.02, MAE: 0.90, R-squared: 0.74
Polynomial - RMSE: 1.01, MAE: 0.87, R-squared: 0.75
LASSO - RMSE: 1.06, MAE: 0.93, R-squared: 0.72
Ridge - RMSE: 1.03, MAE: 0.91, R-squared: 0.74
