In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
import tensorflow as tf
import pickle

In [2]:
data = pd.read_csv('./000660.csv', parse_dates=['Unnamed: 0'])
data = data.rename(columns={'Unnamed: 0': 'Time',
                     '거래량': 'Volume',
                     '종가': 'Close'})
data = data[['Time', 'Close', 'Volume']]

In [3]:
data = data.dropna()

In [4]:
def create_sequence(data, sequence_length=10, predict_size=1):
    X = []
    y = []
    for i in range(len(data) - sequence_length - predict_size + 1):
        X.append(data.iloc[i:i + sequence_length][['Close', 'Volume']].values)
        y.append(data.iloc[i + sequence_length]['Close'])
    return np.array(X), np.array(y)

In [5]:
X, y = create_sequence(data)

In [6]:
print(X.shape)
print(y.shape)

(186830, 10, 2)
(186830,)


In [7]:
scaler = MinMaxScaler()

In [8]:
X_scaled = np.array([scaler.fit_transform(sample) for sample in X])
y_scaled = scaler.fit_transform(y.reshape(-1, 1))

In [9]:
X_tensor = tf.convert_to_tensor(X_scaled, dtype=tf.float32)
y_tensor = tf.convert_to_tensor(y_scaled, dtype=tf.float32)

In [10]:
print(X_tensor.shape)
print(y_tensor.shape)

(186830, 10, 2)
(186830, 1)


In [11]:
with open('./train_X.pkl', 'wb') as file:
    pickle.dump(X_tensor, file)
    
with open('./train_y.pkl', 'wb') as file:
    pickle.dump(y_tensor, file)

In [12]:
with open('./scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)