In [1]:
import pandas as pd
import numpy as np
from sotam import VLSTM

In [2]:
df = pd.read_csv('INTC.csv',usecols=lambda column: column != "Unnamed: 0", 
                 parse_dates=['Date'], index_col='Date')
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
1980-03-17,0.182651,0.185573,0.182651,0.182651,10924800
1980-03-18,0.182651,0.184112,0.18119,0.18119,17068800
1980-03-19,0.185573,0.188496,0.185573,0.185573,18508800
1980-03-20,0.185573,0.187765,0.184843,0.184843,11174400
1980-03-21,0.18119,0.18119,0.178267,0.178267,12172800


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 11175 entries, 1980-03-17 to 2024-07-15
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   Open    11175 non-null  float64
 1   High    11175 non-null  float64
 2   Low     11175 non-null  float64
 3   Close   11175 non-null  float64
 4   Volume  11175 non-null  int64  
dtypes: float64(4), int64(1)
memory usage: 523.8 KB


In [4]:
df.isna().sum()

Open      0
High      0
Low       0
Close     0
Volume    0
dtype: int64

In [5]:
df['year'] = df.index.year
df['month'] = df.index.month
df['day'] = df.index.day
df['dayofweek'] = df.index.dayofweek
df['weekno'] = df.index.isocalendar().week
df['isweekend'] = df.index.weekday // 5
df['season'] = df['month'].apply(lambda month: 1 if month in [12, 1, 2] else 2 
                                 if month in [3, 4, 5] else 3 if month in [6, 7, 8] else 4)
df.sort_index(inplace=True)

In [6]:
df.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume,year,month,day,dayofweek,weekno,isweekend,season
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
1980-03-17,0.182651,0.185573,0.182651,0.182651,10924800,1980,3,17,0,12,0,2
1980-03-18,0.182651,0.184112,0.18119,0.18119,17068800,1980,3,18,1,12,0,2
1980-03-19,0.185573,0.188496,0.185573,0.185573,18508800,1980,3,19,2,12,0,2
1980-03-20,0.185573,0.187765,0.184843,0.184843,11174400,1980,3,20,3,12,0,2
1980-03-21,0.18119,0.18119,0.178267,0.178267,12172800,1980,3,21,4,12,0,2


In [7]:
corr_matrix = df.corr().abs()
target = 'Close'
normalized_corr = (corr_matrix[f'{target}'] - corr_matrix[f'{target}'].min()) / (corr_matrix[f'{target}'].max() - corr_matrix[f'{target}'].min())

n = 6
top_features = normalized_corr.sort_values(ascending=False).index[:n].to_list()

print(f"Top features correlated with {target}:", top_features)
print("Correlation scores normalized to range [0, 1]:\n ", normalized_corr[top_features])

Top features correlated with Close: ['Close', 'Low', 'High', 'Open', 'year', 'Volume']
Correlation scores normalized to range [0, 1]:
  Close     1.000000
Low       0.999844
High      0.999830
Open      0.999674
year      0.862057
Volume    0.220322
Name: Close, dtype: float64


In [8]:
vlstm = VLSTM(target='Close') # you can customize alot in VLSTM()
history, y_test, y_pred, train_score, test_score = vlstm.train(df, top_features)
vlstm.summary()

INFO:tensorflow:Using MirroredStrategy with devices ('/job:localhost/replica:0/task:0/device:CPU:0',)
Number of devices: 1

Epoch 1/10
[1m148/149[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 58ms/step - loss: 0.0297
Epoch 1: val_loss improved from inf to 0.00628, saving model to best_model.keras
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 66ms/step - loss: 0.0295 - val_loss: 0.0063
Epoch 2/10
[1m148/149[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 56ms/step - loss: 0.0055
Epoch 2: val_loss improved from 0.00628 to 0.00542, saving model to best_model.keras
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 62ms/step - loss: 0.0055 - val_loss: 0.0054
Epoch 3/10
[1m148/149[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 57ms/step - loss: 0.0053
Epoch 3: val_loss did not improve from 0.00542
[1m149/149[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 62ms/step - loss: 0.0053 - val_loss: 0.0073
Epoch 4/10
[1m148/149[0m

None


In [9]:
print(f"Train Score: {train_score}")
print(f"Test Score: {test_score}")

Train Score: 99.83
Test Score: 98.8


In [10]:
vlstm.forecast(df,top_features,10,noise_factor=0.025) # Noise is added (if you want to simulate realistic variations in the forecast).

array([34.94142129, 33.61534817, 33.58722647, 33.49398348, 33.98114563,
       34.31025563, 33.59427803, 33.98320399, 33.794458  , 34.57163439])

In [11]:
vlstm.plot_forecast(df,top_features,30,noise_factor=0.025)

In [12]:
# random test data
np.random.seed(42)

data = {
    'Date': pd.date_range(start='1990-01-01', periods=31, freq='D'),
    'Close': np.random.uniform(30, 50, 31).round(2),
    'High': np.random.uniform(50, 60, 31).round(2),
    'Low': np.random.uniform(20, 40, 31).round(2),
    'Open': np.random.uniform(30, 45, 31).round(2),
    'year': np.random.choice([1990, 1991, 1992], 31),
    'Volume': np.random.randint(20000000, 50000000, 31)
}

data = pd.DataFrame(data)
vlstm.predict(data,top_features) #31st step predicted

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 235ms/step


array([37.865005], dtype=float32)

In [13]:
# predictions = vlstm.predict(new_data, features)
vlstm.plot_loss(history)

In [14]:
vlstm.prediction_plot(y_test, y_pred)

In [15]:
metrics = vlstm.evaluate(y_test, y_pred)
vlstm.plot_metrics(metrics)