In [100]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler


In [101]:
df = pd.read_csv('tsmc_data.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1762 entries, 0 to 1761
Data columns (total 6 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Price   1762 non-null   object
 1   Close   1761 non-null   object
 2   High    1761 non-null   object
 3   Low     1761 non-null   object
 4   Open    1761 non-null   object
 5   Volume  1761 non-null   object
dtypes: object(6)
memory usage: 82.7+ KB


In [102]:
df = df.drop(index=[0, 1])
missing_val = df.isnull().sum()
missing_val

Price     0
Close     0
High      0
Low       0
Open      0
Volume    0
dtype: int64

In [103]:
# handle missing val
df.ffill(inplace=True)
df.head(5)

Unnamed: 0,Price,Close,High,Low,Open,Volume
2,2018-01-02,34.1712760925293,34.1962662572868,33.67145055282769,33.74642422489328,4984000
3,2018-01-03,34.74608612060547,34.82939199967066,34.52116501382098,34.6461222435216,6963200
4,2018-01-04,34.56280899047852,34.987657721727544,34.52948664902551,34.92101303882153,4876600
5,2018-01-05,35.3708610534668,35.462495918950445,34.812726040493885,34.896028730318704,5330800
6,2018-01-08,35.35420227050781,35.38752461980848,35.14594235406982,35.32088309900127,3538200


In [104]:
# Use feature to predict target
feature = ['High', 'Low', 'Open', 'Volume']
target = 'Close'

X = df[feature].values
y = df[target].values.reshape(-1, 1)

In [105]:
# Split the data to train, val and test
n = len(X)
train_size = int(n * 0.7)
val_size = int(n * 0.15)

# training set
X_train = X[:train_size]
y_train = y[:train_size]

# validation set
X_val = X[train_size:train_size + val_size]
y_val = y[train_size:train_size + val_size]

# test set
X_test = X[train_size + val_size:]
y_test = y[train_size + val_size:]

In [106]:
# Normalize
# feature
feature_scaler = MinMaxScaler()
X_train_scaled = feature_scaler.fit_transform(X_train)
X_val_scaled = feature_scaler.transform(X_val)
X_test_scaled = feature_scaler.transform(X_test)

# target
target_scaler = MinMaxScaler()
y_train_scaled = target_scaler.fit_transform(y_train)
y_val_scaled = target_scaler.transform(y_val)
y_test_scaled = target_scaler.transform(y_test)

In [107]:
# Create sequences
def create_sequence(X, y, look_back=7, foresight=1):
    X_seq, y_seq = [], []
    for i in range(len(X) - look_back - foresight):
        X_seq.append(X[i:i+look_back])
        y_seq.append(y[i+look_back+foresight-1])
    return X_seq, y_seq

In [108]:
X_train_seq, y_train_seq = create_sequence(X_train_scaled, y_train_scaled)
X_val_seq, y_test_seq = create_sequence(X_val_scaled, y_val_scaled)
X_test_seq, y_test_seq = create_sequence(X_test_scaled, y_test_scaled)