In [78]:
import numpy as np
import pandas as pd
from sklearn import preprocessing
from sklearn import metrics
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import os

In [79]:

csv_file_path = os.path.join(os.getcwd(), 'basic_df.csv')
df = pd.read_csv(csv_file_path)
df.shape

(939, 8)

In [80]:
df.dropna(inplace=True)

In [81]:
df = df.rename(columns={'Exchange Date': 'Date'})
df['Date'] = pd.to_datetime(df['Date'])
df = df[::-1].copy(deep = True)

df.head()

Unnamed: 0.1,Unnamed: 0,Date,Close,Net,%Chg,Open,Low,High
937,937,2020-09-17,1114.58,-9.52,-0.008469,1124.22,1110.08,1124.34
936,936,2020-09-18,1113.8,-0.78,-0.0007,1115.83,1111.07,1120.56
935,935,2020-09-21,1093.33,-20.47,-0.018379,1112.64,1087.69,1117.14
934,934,2020-09-22,1088.58,-4.75,-0.004345,1094.5,1084.85,1094.66
933,933,2020-09-23,1081.49,-7.09,-0.006513,1087.79,1079.47,1090.47


In [83]:
df.drop(columns=['Unnamed: 0'], inplace = True)

In [84]:
df.set_index('Date', inplace=True)

In [85]:
X = df[['High','Low','Open','Net']].values
y = df['Close'].values

In [86]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)


In [74]:
len(X_test)

282

In [87]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)


print(regressor.coef_) # weights of the features
print(regressor.intercept_) # bias

[0.36903278 0.49439502 0.13364029 0.46187092]
4.26540472212173


In [88]:
predicted = regressor.predict(X_test)
predicted

array([1050.24771339, 1195.00880138,  934.50999004, 1025.80771811,
       1174.44592103, 1220.80181383, 1017.62727378, 1087.10356287,
       1015.89768797, 1148.29487084, 1327.54916272, 1263.72379277,
       1026.67800425, 1039.72064796, 1360.11147184, 1035.80764843,
       1052.05465129, 1030.64076185,  917.01108082, 1076.56037956,
       1011.91598134, 1310.65548216, 1183.80328475, 1027.96807801,
       1046.1817223 , 1356.94456257, 1004.89135429, 1025.78188617,
       1080.83419352, 1251.79413258,  961.95776251, 1142.16045586,
       1028.14502772, 1035.61948031, 1391.61392576, 1017.77202426,
       1086.2295846 , 1021.4066014 , 1286.44122361, 1362.84700891,
       1280.83633239, 1280.93937404, 1418.64172228, 1053.27053939,
       1045.06558753,  988.39130694, 1286.93278453, 1030.32321409,
       1063.50701276, 1065.66011954, 1234.90174647, 1064.90979151,
       1022.30320087, 1372.06790172, 1420.82681253, 1142.37444973,
       1348.77627889, 1033.64038927, 1073.78604366, 1396.59629

In [89]:
data1 = pd.DataFrame({'Actual': y_test.flatten(), 'Predicted' : predicted.flatten()})
data1.head()

Unnamed: 0,Actual,Predicted
0,1051.35,1050.247713
1,1196.27,1195.008801
2,933.23,934.50999
3,1025.15,1025.807718
4,1177.03,1174.445921


In [30]:
import math
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test,predicted))
print('Mean Squared Error:', metrics.mean_squared_error(y_test,predicted))
print('Root Mean Squared Error:', math.sqrt(metrics.mean_squared_error(y_test,predicted)))

Mean Absolute Error: 1.7977299178777053
Mean Squared Error: 6.14892844097186
Root Mean Squared Error: 2.479703296963542


In [38]:
X_train = df[['High','Low','Open','Net']].values
y_train = df['Close'].values

In [65]:
last_data = df.iloc[-5:]
last_date = last_data.iloc[-1]['Date']  # Get the last date in the last_data

# Generate future dates for the next 5 days
future_dates = pd.date_range(start=last_date + pd.DateOffset(days=1), periods=5, freq='D')

# Create input features (High, Low, Open, Net) for the next 5 days
x_predict = np.array([last_data[['High', 'Low', 'Open', 'Net']]])

# Create a DataFrame to store the predicted values with corresponding dates
future_df = pd.DataFrame({
    'Date': future_dates,
    'High': x_predict[0, :, 0],  # High prices for the next 5 days (all rows in x_predict, column 0)
    'Low': x_predict[0, :, 1],   # Low prices for the next 5 days (all rows in x_predict, column 1)
    'Open': x_predict[0, :, 2],  # Open prices for the next 5 days (all rows in x_predict, column 2)
    'Net': x_predict[0, :, 3]    # Net prices for the next 5 days (all rows in x_predict, column 3)
})

# Set the 'Date' column as the index
future_df.set_index('Date', inplace=True)

# Display the DataFrame with future dates and input features
print(future_df)

            High  Low  Open  Net
Date                            
2024-04-23   NaN  NaN   NaN  NaN
2024-04-24   NaN  NaN   NaN  NaN
2024-04-25   NaN  NaN   NaN  NaN
2024-04-26   NaN  NaN   NaN  NaN
2024-04-27   NaN  NaN   NaN  NaN


In [54]:
csv_file_path = os.path.join(os.getcwd(), 'test.csv')
truth_df = pd.read_csv(csv_file_path)
truth_df = truth_df[::-1]
y_true = np.array(truth_df['Close'])

In [66]:
regressor = LinearRegression()
regressor.fit(X_train, y_train)


print(regressor.coef_) # weights of the features
print(regressor.intercept_) # bias

[0.4100308  0.49818292 0.08883974 0.44378418]
4.120145206300549


In [None]:
predicted = regressor.predict(future_df)
print(predicted)

np.sqrt(metrics.mean_squared_error(y_true,predicted))

In [62]:
X_train_15 = X_train[-15:]
y_train_15 = y_train[-15:]

regressor = LinearRegression()
regressor.fit(X_train_15, y_train_15)


print(regressor.coef_) # weights of the features
print(regressor.intercept_) # bias

predicted = regressor.predict(future_df)
print(predicted)

np.sqrt(metrics.mean_squared_error(y_true,predicted))

[-0.1920317  -0.04800175  1.24514231  1.08121988]
-5.199669107476211
[1070.59719675 1074.0429484  1078.5014922  1062.39530303 1068.28915205]




28.197746322204914

In [63]:
X_train_30 = X_train[-30:]
y_train_30 = y_train[-30:]

regressor = LinearRegression()
regressor.fit(X_train_30, y_train_30)


print(regressor.coef_) # weights of the features
print(regressor.intercept_) # bias

predicted = regressor.predict(future_df)
print(predicted)

np.sqrt(metrics.mean_squared_error(y_true,predicted))

[0.18531434 0.35084015 0.45635543 0.60106467]
8.92547142816602
[1072.37011779 1074.99887258 1079.13198606 1061.00581785 1068.16097762]




28.409822866403502

In [64]:
X_train_60 = X_train[-60:]
y_train_60 = y_train[-60:]

regressor = LinearRegression()
regressor.fit(X_train_60, y_train_60)


print(regressor.coef_) # weights of the features
print(regressor.intercept_) # bias

predicted = regressor.predict(future_df)
print(predicted)

np.sqrt(metrics.mean_squared_error(y_true,predicted))

[0.23709487 0.25067891 0.51241017 0.65818685]
-0.32161291454031016
[1072.46895657 1074.53461047 1079.05565557 1061.49363434 1067.84431157]




28.42314712120198