In [1]:
### Simulation of an arrival process with arrival rate lambda(t), 0<t<T
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import kurtosis
from tqdm import tqdm
import pickle
import pandas as pd
import random

In [2]:
file_path = 'HS300_security.pkl'

# Open the file in binary read mode
with open(file_path, 'rb') as file:
    data = pickle.load(file)


In [4]:
random.seed(1)
close_price_pivot = data.pivot(index='date', columns='code', values='close').dropna(axis=1).astype(float)

num_columns_to_select = 20

# Randomly select 20 columns
selected_columns = close_price_pivot.sample(n=num_columns_to_select, axis=1, random_state=42)
close_price_pivot = close_price_pivot[selected_columns.columns]


In [5]:
ctc_return = ((close_price_pivot/close_price_pivot.shift(1)-1)*100).dropna()
last_close = close_price_pivot.shift(1).dropna()

row_sums = last_close.to_numpy().sum(axis=1, keepdims=True)
last_close_weights = last_close / row_sums

In [6]:
random.seed(1)
selected_indices = random.sample(ctc_return.index.tolist(), 1200)
ctc_return_training = ctc_return.loc[selected_indices]
last_close_weights_training = last_close_weights.loc[selected_indices]
ctc_return_testing = ctc_return.drop(selected_indices)
last_close_weights_testing = last_close_weights.drop(selected_indices)
last_close_testing = last_close.drop(selected_indices)

# convert to numpy array

training_array = ctc_return_training.to_numpy()
training_array = training_array.reshape(training_array.shape[0], 1, training_array.shape[1])
testing_array = ctc_return_testing.to_numpy()
testing_array = testing_array.reshape(testing_array.shape[0], 1, testing_array.shape[1])
last_close_testing_array = last_close_testing.to_numpy()
# last_close_testing_array = last_close_testing_array.reshape(last_close_testing_array.shape[0], 1, last_close_testing_array.shape[1])

In [7]:
np.save(f'stock_data_train_{num_columns_to_select}.npy', training_array)
np.save(f'stock_data_test_{num_columns_to_select}.npy', testing_array)
np.save(f'last_close_test_{num_columns_to_select}.npy', last_close_testing_array)