In [1]:
import pandas as pd
import numpy as np
from sotam import VLSTM

In [2]:
df = pd.read_csv('INTC.csv',usecols=lambda column: column != "Unnamed: 0", 
                 parse_dates=['Date'], index_col='Date')
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980-03-17,0.182651,0.185573,0.182651,0.182651,10924800
1980-03-18,0.182651,0.184112,0.18119,0.18119,17068800
1980-03-19,0.185573,0.188496,0.185573,0.185573,18508800
1980-03-20,0.185573,0.187765,0.184843,0.184843,11174400
1980-03-21,0.18119,0.18119,0.178267,0.178267,12172800


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 11175 entries, 1980-03-17 to 2024-07-15
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    11175 non-null  float64
 1   High    11175 non-null  float64
 2   Low     11175 non-null  float64
 3   Close   11175 non-null  float64
 4   Volume  11175 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 523.8 KB


In [4]:
df.isna().sum()

Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [5]:
df['year'] = df.index.year
df['month'] = df.index.month
df['day'] = df.index.day
df['dayofweek'] = df.index.dayofweek
df['weekno'] = df.index.isocalendar().week
df['isweekend'] = df.index.weekday // 5
df['season'] = df['month'].apply(lambda month: 1 if month in [12, 1, 2] else 2 
                                 if month in [3, 4, 5] else 3 if month in [6, 7, 8] else 4)
df.sort_index(inplace=True)

In [6]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,year,month,day,dayofweek,weekno,isweekend,season
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1980-03-17,0.182651,0.185573,0.182651,0.182651,10924800,1980,3,17,0,12,0,2
1980-03-18,0.182651,0.184112,0.18119,0.18119,17068800,1980,3,18,1,12,0,2
1980-03-19,0.185573,0.188496,0.185573,0.185573,18508800,1980,3,19,2,12,0,2
1980-03-20,0.185573,0.187765,0.184843,0.184843,11174400,1980,3,20,3,12,0,2
1980-03-21,0.18119,0.18119,0.178267,0.178267,12172800,1980,3,21,4,12,0,2


In [7]:
corr_matrix = df.corr().abs()
target = 'Close'
normalized_corr = (corr_matrix[f'{target}'] - corr_matrix[f'{target}'].min()) / (corr_matrix[f'{target}'].max() - corr_matrix[f'{target}'].min())

n = 6
top_features = normalized_corr.sort_values(ascending=False).index[:n].to_list()

print(f"Top features correlated with {target}:", top_features)
print("Correlation scores normalized to range [0, 1]:\n ", normalized_corr[top_features])

Top features correlated with Close: ['Close', 'Low', 'High', 'Open', 'year', 'Volume']
Correlation scores normalized to range [0, 1]:
  Close     1.000000
Low       0.999844
High      0.999830
Open      0.999674
year      0.862057
Volume    0.220322
Name: Close, dtype: float64


In [None]:
vlstm = VLSTM(target='Close') # you can customize alot in VLSTM()
history, y_test, y_pred, train_score, test_score = vlstm.train(df, top_features)
vlstm.summary()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
Number of devices: 1

Epoch 1/10
[1m148/149[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 57ms/step - loss: 0.0339
Epoch 1: val_loss improved from inf to 0.00683, saving model to best_model.keras
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 65ms/step - loss: 0.0336 - val_loss: 0.0068
Epoch 2/10
[1m148/149[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 62ms/step - loss: 0.0054
Epoch 2: val_loss did not improve from 0.00683
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 68ms/step - loss: 0.0054 - val_loss: 0.0119
Epoch 3/10
[1m148/149[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 63ms/step - loss: 0.0059
Epoch 3: val_loss improved from 0.00683 to 0.00560, saving model to best_model.keras
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 69ms/step - loss: 0.0059 - val_loss: 0.0056
Epoch 4/10
[1m148/149[

None


In [9]:
metrics = vlstm.evaluate(y_test, y_pred)

Evaluation Metrics:
Mean Absolute Error (MAE): 0.6406
Mean Absolute Percentage Error (MAPE): 1.5983%
Mean Absolute Deviation (MAD): 0.6406
Root Mean Squared Error (RMSE): 1.0003
Explained Variance Score: 0.9890
Max Error: 9.9224
Mean Squared Error (MSE): 1.0005
Median Absolute Error: 0.4247


In [10]:
print(f"Train Score: {train_score}")
print(f"Test Score: {test_score}")

Train Score: 99.81
Test Score: 98.9


In [11]:
# predictions = vlstm.predict(new_data, features)
vlstm.plot_loss(history)

In [12]:
vlstm.prediction_plot(y_test, y_pred)

In [13]:
vlstm.plot_metrics(metrics)