In [None]:
import pandas as pd
import numpy as np
import random
from tqdm import tqdm
from HighFreqFactors import Factors

In [None]:
# Read the daily data
spot_data = pd.read_csv('AAPL_1min_不复权.csv',
                        names = ['datetime', 'open', 'high', 'low', 'close', 'volume'])

# set the index
spot_data.index = pd.to_datetime(spot_data['datetime'])
spot_data.drop(columns=['datetime'], inplace=True)

# Initialize the datadict
datas = {}
times = np.unique(spot_data.index.date)
for time in times:
    time_str = time.strftime('%Y-%m-%d')
    datas[time_str] = spot_data.loc[time_str]


In [None]:
# Suppress SettingWithCopyWarning
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
# Calculate series of features
for time in tqdm(times, desc='Calculating features'):
    time_str = time.strftime('%Y-%m-%d')
    data = datas[time_str]
    Factors.calculate_median_price(data)
    Factors.calculate_return(data, 'median_price')
    Factors.calculate_realized_volatility(data)
    Factors.calculate_realized_skewness_expanding(data, 'realized_skewness')
    Factors.calculate_realized_kurtosis_expanding(data, 'realized_kurtosis')
    Factors.calculate_momentum(data, 'momentum')
    datas[time_str] = data


In [None]:
cols = datas[times[0].strftime('%Y-%m-%d')].columns.to_list()

In [None]:
# Feature normalization function
def normalize_expanding(data: pd.DataFrame, factor_col: list):
    for factor in factor_col:
        expanding_min = data[factor].expanding().min()
        expanding_max = data[factor].expanding().max()
        data[factor + '_norm'] = 2 * (data[factor] - expanding_min) / (expanding_max - expanding_min) - 1
    return data

In [None]:
# Normalize the features
for time in times:
    time_str = time.strftime('%Y-%m-%d')
    data = datas[time_str]
    data = normalize_expanding(data, factors_need_normalize)
    datas[time_str] = data

In [None]:
# Add Your Own Factor Names Here
facs = list['momentum', 'return', 'realized_volatiltiy','realized_skewness','realized_kurtosis'] + ['close']

# create datas_to_use
datas_to_use = {}
for date in data_dict.keys():
    datas_to_use[date] = data_dict[date][facs].dropna(axis=0, how='any', inplace=False)

In [None]:
# Get the list of dates
dates = list(datas.keys())
train_ratio = 0.8
num_dates = len(dates)
num_train = int(train_ratio * num_dates)

# Split the dates
train_dates = dates[:num_train]

In [None]:
# Create environments
def create_env(data_dict, date):
    data = data_dict[date]
    return TradingEnv(data)

train_envs = [create_env(datas_to_use, date) for date in train_dates]
train_envs = random.sample(train_envs, 300)

In [None]:
# Initialize the DQN agent
state_dim = train_envs[0].observation_space.shape[0] * train_envs[0].observation_space.shape[1]
action_dim = train_envs[0].action_space.n
agent = DQNAgent(state_dim, action_dim,
                 replay_buffer_size=10000,
                 batch_size=512,
                 gamma=0.995,
                 lr=0.00001,
                 target_update=1000)

In [None]:
# Train the agent
rewards_log = agent.train(train_envs, num_episodes=1)