In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error, r2_score
from LassoLinearRegression import LassoRegression
from RidgeLinearRegression import RidgeLinearRegression
from LinearRegression import run_linear_regression

In [2]:
def r2_score(y_true, y_pred):
    ss_total = np.sum((y_true - np.mean(y_true)) ** 2)   # Total sum of squares
    ss_residual = np.sum((y_true - y_pred) ** 2)         # Residual sum of squares
    r2 = 1 - (ss_residual / ss_total)                    # R-squared formula
    return r2

### Non-Polynomial Input Variables


In [3]:
merged_df = pd.read_csv('final_dataset.csv')
merged_df['Date_hourly'] = merged_df['Date_hourly'].astype(str)

Y = merged_df[['Pct_Change']]  
X = merged_df[['weighted_positive_fb', 'weighted_negative_fb', 'weighted_neutral_fb','weighted_DocTone']]  

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=1616)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=1616)  

In [4]:
lin_reg_model = run_linear_regression(x_train, y_train)
lasso = LassoRegression(alpha=0.1, num_iterations= 1000, learning_rate=0.01)
lasso.fit(x_train, np.array(y_train).flatten())

ridge = RidgeLinearRegression(alpha=0.1, learning_rate=0.01, epochs=1000)
ridge.fit(x_train, np.array(y_train).flatten())

lin_reg_predicted_values = lin_reg_model.predict(x_test.values)
lasso_y_pred = lasso.predict(x_test)
ridge_y_pred = ridge.predict(x_test)

train_score = lin_reg_model.score(x_train.values, y_train.values.ravel())
test_score = lin_reg_model.score(x_test.values, y_test.values.ravel())
train_rmse = lin_reg_model.rmse(x_train.values, y_train.values.ravel())
test_rmse = lin_reg_model.rmse(x_test.values, y_test.values.ravel())

lasso_test_score = r2_score(np.array(y_test).flatten(), np.array(lasso_y_pred).flatten())
ridge_test_score = r2_score(np.array(y_test).flatten(), np.array(ridge_y_pred).flatten())

print(f"Training RMSE: {train_rmse} | Non-Polynomic Train Score: {train_score}")
print(f"Test RMSE: {test_rmse} | Non-Polynomic Test Score: {test_score}")

print(f"Lasso Regression Non-Polynomic Score: {lasso_test_score}")
print(f"Ridge Regression Non-Polynomic Score: {ridge_test_score}")


Training RMSE: 1.6497097227158268 | Non-Polynomic Train Score: 0.6479707220472588
Test RMSE: 1.6532337794048626 | Non-Polynomic Test Score: 0.6483578705400168
Lasso Regression Non-Polynomic Score: 0.6086289461659075
Ridge Regression Non-Polynomic Score: 0.6482889332172941


### Polynomic Input Variables


In [5]:
# Polynomial feature creation

merged_df['weighted_positive_fb_squared'] = merged_df['weighted_positive_fb'] ** 2
merged_df['positive_negative_interaction'] = merged_df['weighted_positive_fb'] * merged_df['weighted_negative_fb']
merged_df['positive_neutral_interaction'] = merged_df['weighted_positive_fb'] * merged_df['weighted_neutral_fb']
merged_df['negative_neutral_interaction'] = merged_df['weighted_negative_fb'] * merged_df['weighted_neutral_fb']

Y = merged_df[['Pct_Change']]  
X = merged_df[['weighted_positive_fb_squared', 'positive_negative_interaction', 'positive_neutral_interaction','negative_neutral_interaction', 'weighted_positive_fb', 'weighted_negative_fb', 'weighted_neutral_fb','weighted_DocTone']]  

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=1616)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=1616)  


In [6]:
lin_reg_model = run_linear_regression(x_train, y_train)
lasso = LassoRegression(alpha=0.1, num_iterations= 1000, learning_rate=0.01)
lasso.fit(x_train, np.array(y_train).flatten())

ridge = RidgeLinearRegression(alpha=0.1, learning_rate=0.01, epochs=1000)
ridge.fit(x_train, np.array(y_train).flatten())

lin_reg_predicted_values = lin_reg_model.predict(x_test.values)
lasso_y_pred = lasso.predict(x_test)
ridge_y_pred = ridge.predict(x_test)

train_score = lin_reg_model.score(x_train.values, y_train.values.ravel())
test_score = lin_reg_model.score(x_test.values, y_test.values.ravel())
train_rmse = lin_reg_model.rmse(x_train.values, y_train.values.ravel())
test_rmse = lin_reg_model.rmse(x_test.values, y_test.values.ravel())

lasso_test_score = r2_score(np.array(y_test).flatten(), np.array(lasso_y_pred).flatten())
ridge_test_score = r2_score(np.array(y_test).flatten(), np.array(ridge_y_pred).flatten())

print(f"Training RMSE: {train_rmse} | Polynomic Train Score: {train_score}")
print(f"Test RMSE: {test_rmse} | Polynomic Test Score: {test_score}")

print(f"Lasso Regression Polynomic Score: {lasso_test_score}")
print(f"Ridge Regression Polynomic Score: {ridge_test_score}")


Training RMSE: 1.6060308853333638 | Polynomic Train Score: 0.6663650778748216
Test RMSE: 1.6144774882867075 | Polynomic Test Score: 0.6646515153883017
Lasso Regression Polynomic Score: 0.6138427781610919
Ridge Regression Polynomic Score: 0.6428432056192599


Training RMSE: 1.5907842134771632 | Polynomial Train Score: 0.6832450714320921  
Test RMSE: 1.5962203492327482 | Polynomial Test Score: 0.6809621031247543  
Lasso Regression Polynomial Score: 0.6453719228701352  
Ridge Regression Polynomial Score: 0.6700021317909382


In [None]:
# Polynomial feature creation

merged_df['weighted_positive_fb_squared'] = merged_df['weighted_positive_fb'] ** 2
merged_df['positive_negative_interaction'] = merged_df['weighted_positive_fb'] * merged_df['weighted_negative_fb']
merged_df['positive_neutral_interaction'] = merged_df['weighted_positive_fb'] * merged_df['weighted_neutral_fb']
merged_df['negative_neutral_interaction'] = merged_df['weighted_negative_fb'] * merged_df['weighted_neutral_fb']

Y = merged_df[['Pct_Change']]  
X = merged_df[['weighted_positive_fb_squared', 'positive_negative_interaction', 'positive_neutral_interaction','negative_neutral_interaction', 'weighted_positive_fb', 'weighted_negative_fb', 'weighted_neutral_fb','weighted_DocTone']]  

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=1616)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=1616)  


In [None]:
# Polynomial feature creation

merged_df['weighted_positive_fb_squared'] = merged_df['weighted_positive_fb'] ** 2
merged_df['positive_negative_interaction'] = merged_df['weighted_positive_fb'] * merged_df['weighted_negative_fb']
merged_df['positive_neutral_interaction'] = merged_df['weighted_positive_fb'] * merged_df['weighted_neutral_fb']
merged_df['negative_neutral_interaction'] = merged_df['weighted_negative_fb'] * merged_df['weighted_neutral_fb']

Y = merged_df[['Pct_Change']]  
X = merged_df[['weighted_positive_fb_squared', 'positive_negative_interaction', 'positive_neutral_interaction','negative_neutral_interaction', 'weighted_positive_fb', 'weighted_negative_fb', 'weighted_neutral_fb','weighted_DocTone']]  

x_train, x_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=1616)
x_val, x_test, y_val, y_test = train_test_split(x_test, y_test, test_size=0.5, random_state=1616)  
