In [8]:
import pandas as pd

# 读取CSV文件
spy_growth = pd.read_csv('/usr3/graduate/xz0224/normal/spyg.csv')
spy_value = pd.read_csv('/usr3/graduate/xz0224/normal/spyv.csv')
sp500 = pd.read_csv('/usr3/graduate/xz0224/normal/spy.csv')
cash = pd.read_csv('/usr3/graduate/xz0224/normal/cash.csv')
qqq_cc = pd.read_csv('/usr3/graduate/xz0224/normal/qqq.csv')
qqq_co = pd.read_csv('/usr3/graduate/xz0224/co/qqq.csv')

# 转换日期列为日期时间格式
spy_growth['date'] = pd.to_datetime(spy_growth['date'])
spy_value['date'] = pd.to_datetime(spy_value['date'])
sp500['date'] = pd.to_datetime(sp500['date'])
cash['date'] = pd.to_datetime(cash['date'])
qqq_cc['date'] = pd.to_datetime(qqq_cc['date'])
qqq_co['date'] = pd.to_datetime(qqq_co['date'])

# 初始化投资金额
initial_investment = 100

# Oracle 对于 Growth 和 Value 的选择
oracle_values_GV = [initial_investment]
oracle_dates_GV = [pd.to_datetime('2000-01-01')]

# 模拟 Oracle 投资过程
current_value = initial_investment
for date in pd.date_range(start='2000-01-01', end='2024-06-30'):
    if date in spy_growth['date'].values and date in spy_value['date'].values:
        growth_rate = spy_growth.loc[spy_growth['date'] == date, 'return'].values[0]
        value_rate = spy_value.loc[spy_value['date'] == date, 'return'].values[0]
        # Oracle 选择收益率更高的投资
        current_value *= (1 + max(growth_rate, value_rate))
        oracle_values_GV.append(current_value)
        oracle_dates_GV.append(date)

print(f"Final Oracle investment amount for Growth vs Value: {current_value}")

# 'Buy and Hold' 策略的投资值
buy_hold_growth_values = [initial_investment]
buy_hold_value_values = [initial_investment]
buy_hold_spy_values = [initial_investment]
buy_hold_qqq_cc_values = [initial_investment]
buy_hold_qqq_co_values = [initial_investment]

# 初始化日期列表
buy_hold_dates = [pd.to_datetime('2000-01-01')]

# 遍历每个日期，计算投资价值
for date in pd.date_range(start='2000-01-01', end='2024-06-30'):
    if date in spy_growth['date'].values and date in spy_value['date'].values and date in sp500['date'].values and date in qqq_cc['date'].values and date in qqq_co['date'].values:
        growth_rate = spy_growth.loc[spy_growth['date'] == date, 'return'].values[0]
        value_rate = spy_value.loc[spy_value['date'] == date, 'return'].values[0]
        spy_rate = sp500.loc[sp500['date'] == date, 'return'].values[0]
        qqq_cc_rate = qqq_cc.loc[qqq_cc['date'] == date, 'return'].values[0]
        qqq_co_rate = qqq_co.loc[qqq_co['date'] == date, 'return'].values[0]

        # 计算 'Buy and Hold' 策略的投资价值
        current_value_growth = buy_hold_growth_values[-1] * (1 + growth_rate)
        current_value_value = buy_hold_value_values[-1] * (1 + value_rate)
        current_value_spy = buy_hold_spy_values[-1] * (1 + spy_rate)
        current_value_qqq_cc = buy_hold_qqq_cc_values[-1] * (1 + qqq_cc_rate)
        current_value_qqq_co = buy_hold_qqq_co_values[-1] * (1 + qqq_co_rate)

        buy_hold_growth_values.append(current_value_growth)
        buy_hold_value_values.append(current_value_value)
        buy_hold_spy_values.append(current_value_spy)
        buy_hold_qqq_cc_values.append(current_value_qqq_cc)
        buy_hold_qqq_co_values.append(current_value_qqq_co)
        buy_hold_dates.append(date)

# Oracle 对于 SPY 和 Cash 的选择
oracle_values_MC = [initial_investment]
oracle_dates_MC = [pd.to_datetime('2000-01-01')]

# 模拟 Oracle 投资过程
current_value = initial_investment
for date in pd.date_range(start='2000-01-01', end='2024-06-30'):
    if date in sp500['date'].values:
        spy_rate = sp500.loc[sp500['date'] == date, 'return'].values[0]
        if spy_rate > 0:
            current_value *= (1 + spy_rate)
        else:
            # 如果SPY的回报率为负，则保持现金不变
            current_value *= 1
        oracle_values_MC.append(current_value)
        oracle_dates_MC.append(date)


Final Oracle investment amount for Growth vs Value: 7720645.775493967


In [9]:
import pandas as pd
import empyrical
import os

# 读取Excel文件中的数据路径
file_paths = {
    'GC-1Y-OC': '/usr3/graduate/xz0224/oc/gc/investment_results_final(2006)(train_1_year_o-c).xlsx',
    'GC-3M-OC': '/usr3/graduate/xz0224/oc/gc/investment_results_final(2006)(train_3_month_o-c).xlsx',
    'GC-6M-OC': '/usr3/graduate/xz0224/oc/gc/investment_results_final(2006)(train_6_month_o-c).xlsx',
    'MC-1Y-OC': '/usr3/graduate/xz0224/oc/mc/investment_results_final(2006)(train_1years_c-c).xlsx',
    'MC-3M-OC': '/usr3/graduate/xz0224/oc/mc/investment_results_final(2006)(train_3months_c-c).xlsx',
    'MC-6M-OC': '/usr3/graduate/xz0224/oc/mc/investment_results_final(2006)(train_6months_c-c).xlsx',
    'VC-1Y-OC': '/usr3/graduate/xz0224/oc/vc/investment_results_final(2006)(train_1_year_c-c).xlsx',
    'VC-3M-OC': '/usr3/graduate/xz0224/oc/vc/investment_results_final(2006)(train_3months_c-c).xlsx',
    'VC-6M-OC': '/usr3/graduate/xz0224/oc/vc/investment_results_final(2006)(train_6months_c-c).xlsx',
#   'QQQ-CO': '/usr3/graduate/xz0224/co/investment_results_final(2006)(train_1years_c-c).xlsx',
    'QQQ-OC': '/usr3/graduate/xz0224/oc/investment_results_final(2006)(train_1years_c-c).xlsx',
    'QQQ-CC': '/usr3/graduate/xz0224/normal/investment_results_final(2006)(train_1years_c-c).xlsx'
}



# Function to read data from Excel or CSV files
def read_data(file_path):
    if file_path.endswith('.xlsx'):
        data = pd.read_excel(file_path)
    elif file_path.endswith('.csv'):
        data = pd.read_csv(file_path)
    return pd.to_datetime(data['Date']), data['Investment_Value']

# Read the data from the files
strategies_data = {name: read_data(path) for name, path in file_paths.items()}

# Add buy_hold_qqq_cc_values and buy_hold_qqq_co_values to strategies_data
strategies_data['Buy_Hold_QQQ_CC'] = (buy_hold_dates, buy_hold_qqq_cc_values)

# Initialize DataFrames to store results
end_year_values_df = pd.DataFrame()
annual_returns_df = pd.DataFrame()
max_drawdowns_df = pd.DataFrame()
annual_volatility_df = pd.DataFrame()
sharpe_ratios_df = pd.DataFrame()

# Function to analyze investment strategy
def analyze_investment_strategy(dates, values):
    df = pd.DataFrame({'date': pd.to_datetime(dates), 'value': values})
    df.set_index('date', inplace=True)
    grouped = df.resample('Y')
    year_end_values = {}
    annual_returns = {}
    annual_max_drawdowns = {}
    annual_volatility = {}
    sharpe_ratios = {}

    for name, group in grouped:
        year = name.year
        year_end_values[year] = group.iloc[-1]['value']
        daily_returns = empyrical.simple_returns(group['value'])
        annual_return = empyrical.annual_return(daily_returns)
        max_drawdown = empyrical.max_drawdown(daily_returns)
        volatility = empyrical.annual_volatility(daily_returns)
        sharpe_ratio = annual_return / volatility if volatility != 0 else 0

        annual_returns[year] = annual_return
        annual_max_drawdowns[year] = max_drawdown
        annual_volatility[year] = volatility
        sharpe_ratios[year] = sharpe_ratio

    return year_end_values, annual_returns, annual_max_drawdowns, annual_volatility, sharpe_ratios

# Analyze each strategy and collect results
for strategy_name, (strategy_dates, strategy_values) in strategies_data.items():
    year_end_values, annual_returns, max_drawdowns, annual_volatility, sharpe_ratios = analyze_investment_strategy(strategy_dates, strategy_values)
    year_end_values_df_ = pd.DataFrame.from_dict(year_end_values, orient='index', columns=[strategy_name])
    annual_returns_df_ = pd.DataFrame.from_dict(annual_returns, orient='index', columns=[strategy_name])
    max_drawdowns_df_ = pd.DataFrame.from_dict(max_drawdowns, orient='index', columns=[strategy_name])
    annual_volatility_df_ = pd.DataFrame.from_dict(annual_volatility, orient='index', columns=[strategy_name])
    sharpe_ratios_df_ = pd.DataFrame.from_dict(sharpe_ratios, orient='index', columns=[strategy_name])

    end_year_values_df = pd.concat([end_year_values_df, year_end_values_df_], axis=1)
    annual_returns_df = pd.concat([annual_returns_df, annual_returns_df_], axis=1)
    max_drawdowns_df = pd.concat([max_drawdowns_df, max_drawdowns_df_], axis=1)
    annual_volatility_df = pd.concat([annual_volatility_df, annual_volatility_df_], axis=1)
    sharpe_ratios_df = pd.concat([sharpe_ratios_df, sharpe_ratios_df_], axis=1)

# Reset index to add 'Year' as the first column
end_year_values_df.reset_index(inplace=True)
annual_returns_df.reset_index(inplace=True)
max_drawdowns_df.reset_index(inplace=True)
annual_volatility_df.reset_index(inplace=True)
sharpe_ratios_df.reset_index(inplace=True)

# Rename the index column to 'Year'
end_year_values_df.rename(columns={'index': 'Year'}, inplace=True)
annual_returns_df.rename(columns={'index': 'Year'}, inplace=True)
max_drawdowns_df.rename(columns={'index': 'Year'}, inplace=True)
annual_volatility_df.rename(columns={'index': 'Year'}, inplace=True)
sharpe_ratios_df.rename(columns={'index': 'Year'}, inplace=True)

# Create the directory if it does not exist
os.makedirs('jul2', exist_ok=True)

# Save DataFrames to .xlsx files
end_year_values_df.to_excel('jul2/end_year_values-1.xlsx', index=False)
annual_returns_df.to_excel('jul2/annual_returns-1.xlsx', index=False)
max_drawdowns_df.to_excel('jul2/max_drawdowns-1.xlsx', index=False)
annual_volatility_df.to_excel('jul2/annual_volatility-1.xlsx', index=False)
sharpe_ratios_df.to_excel('jul2/sharpe_ratios-1.xlsx', index=False)


  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')


In [11]:
import pandas as pd
import empyrical
import os
# Add buy_hold_qqq_cc_values and buy_hold_qqq_co_values to strategies_data
strategies_data['BH-QQ'] = (buy_hold_dates, buy_hold_qqq_cc_values)

# 读取Excel文件中的数据路径
file_paths = {
    'GC-1Y': '/usr3/graduate/xz0224/oc/gc/investment_results_final(2006)(train_1_year_o-c).xlsx',
    'GC-6M': '/usr3/graduate/xz0224/oc/gc/investment_results_final(2006)(train_6_month_o-c).xlsx',
    'GC-3M': '/usr3/graduate/xz0224/oc/gc/investment_results_final(2006)(train_3_month_o-c).xlsx',
    'MC-1Y': '/usr3/graduate/xz0224/oc/mc/investment_results_final(2006)(train_1years_c-c).xlsx',
    'MC-6M': '/usr3/graduate/xz0224/oc/mc/investment_results_final(2006)(train_6months_c-c).xlsx',
    'MC-3M': '/usr3/graduate/xz0224/oc/mc/investment_results_final(2006)(train_3months_c-c).xlsx',
    'VC-1Y': '/usr3/graduate/xz0224/oc/vc/investment_results_final(2006)(train_1_year_c-c).xlsx',
    'VC-3M': '/usr3/graduate/xz0224/oc/vc/investment_results_final(2006)(train_3months_c-c).xlsx',
    'VC-6M': '/usr3/graduate/xz0224/oc/vc/investment_results_final(2006)(train_6months_c-c).xlsx',
    'QQQ-CO': '/usr3/graduate/xz0224/co/investment_results_final(2006)(train_1years_c-c).xlsx',
    'QQ': '/usr3/graduate/xz0224/oc/investment_results_final(2006)(train_1years_c-c).xlsx',
    'QQ-CC': '/usr3/graduate/xz0224/normal/investment_results_final(2006)(train_1years_c-c).xlsx'
}

strategies_data = {
    'BH-QQ': (buy_hold_dates, buy_hold_qqq_cc_values)
}

# Function to read data from Excel or CSV files
def read_data(file_path):
    if file_path.endswith('.xlsx'):
        data = pd.read_excel(file_path)
    elif file_path.endswith('.csv'):
        data = pd.read_csv(file_path)
    return pd.to_datetime(data['Date']), data['Investment_Value']

# Read the data from the files and add to strategies_data
for name, path in file_paths.items():
    strategies_data[name] = read_data(path)


# Initialize DataFrames to store results
end_year_values_df = pd.DataFrame()
annual_returns_df = pd.DataFrame()
max_drawdowns_df = pd.DataFrame()
annual_volatility_df = pd.DataFrame()
sharpe_ratios_df = pd.DataFrame()

# Function to analyze investment strategy
def analyze_investment_strategy(dates, values):
    df = pd.DataFrame({'date': pd.to_datetime(dates), 'value': values})
    df.set_index('date', inplace=True)
    grouped = df.resample('Y')
    year_end_values = {}
    annual_returns = {}
    annual_max_drawdowns = {}
    annual_volatility = {}
    sharpe_ratios = {}

    for name, group in grouped:
        year = name.year
        year_end_values[year] = group.iloc[-1]['value']
        daily_returns = empyrical.simple_returns(group['value'])
        annual_return = empyrical.annual_return(daily_returns)
        max_drawdown = empyrical.max_drawdown(daily_returns)
        volatility = empyrical.annual_volatility(daily_returns)
        sharpe_ratio = annual_return / volatility if volatility != 0 else 0

        annual_returns[year] = annual_return
        annual_max_drawdowns[year] = max_drawdown
        annual_volatility[year] = volatility
        sharpe_ratios[year] = sharpe_ratio

    return year_end_values, annual_returns, annual_max_drawdowns, annual_volatility, sharpe_ratios

# Analyze each strategy and collect results
for strategy_name, (strategy_dates, strategy_values) in strategies_data.items():
    year_end_values, annual_returns, max_drawdowns, annual_volatility, sharpe_ratios = analyze_investment_strategy(strategy_dates, strategy_values)
    year_end_values_df_ = pd.DataFrame.from_dict(year_end_values, orient='index', columns=[strategy_name])
    annual_returns_df_ = pd.DataFrame.from_dict(annual_returns, orient='index', columns=[strategy_name])
    max_drawdowns_df_ = pd.DataFrame.from_dict(max_drawdowns, orient='index', columns=[strategy_name])
    annual_volatility_df_ = pd.DataFrame.from_dict(annual_volatility, orient='index', columns=[strategy_name])
    sharpe_ratios_df_ = pd.DataFrame.from_dict(sharpe_ratios, orient='index', columns=[strategy_name])

    end_year_values_df = pd.concat([end_year_values_df, year_end_values_df_], axis=1)
    annual_returns_df = pd.concat([annual_returns_df, annual_returns_df_], axis=1)
    max_drawdowns_df = pd.concat([max_drawdowns_df, max_drawdowns_df_], axis=1)
    annual_volatility_df = pd.concat([annual_volatility_df, annual_volatility_df_], axis=1)
    sharpe_ratios_df = pd.concat([sharpe_ratios_df, sharpe_ratios_df_], axis=1)

# Reset index to add 'Year' as the first column
end_year_values_df.reset_index(inplace=True)
annual_returns_df.reset_index(inplace=True)
max_drawdowns_df.reset_index(inplace=True)
annual_volatility_df.reset_index(inplace=True)
sharpe_ratios_df.reset_index(inplace=True)

# Rename the index column to 'Year'
end_year_values_df.rename(columns={'index': 'Year'}, inplace=True)
annual_returns_df.rename(columns={'index': 'Year'}, inplace=True)
max_drawdowns_df.rename(columns={'index': 'Year'}, inplace=True)
annual_volatility_df.rename(columns={'index': 'Year'}, inplace=True)
sharpe_ratios_df.rename(columns={'index': 'Year'}, inplace=True)

# Create the directory if it does not exist
os.makedirs('jul2', exist_ok=True)

# Save DataFrames to .xlsx files
end_year_values_df.to_excel('jul2/end_year_values-1.xlsx', index=False)
annual_returns_df.to_excel('jul2/annual_returns-1.xlsx', index=False)
max_drawdowns_df.to_excel('jul2/max_drawdowns-1.xlsx', index=False)
annual_volatility_df.to_excel('jul2/annual_volatility-1.xlsx', index=False)
sharpe_ratios_df.to_excel('jul2/sharpe_ratios-1.xlsx', index=False)


  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')
  grouped = df.resample('Y')


In [None]:
import os
import pandas as pd
import numpy as np
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import openpyxl
from openpyxl import load_workbook
from keras import backend as K
import gc
import tensorflow as tf
import time

os.environ["CUDA_VISIBLE_DEVICES"] = "0"

# 检查 TensorFlow 是否检测到 GPU
physical_devices = tf.config.list_physical_devices('GPU')
print("Num GPUs Available: ", len(physical_devices))

# 确保 TensorFlow 使用 GPU
if len(physical_devices) > 0:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)

print("CUDA enabled:", tf.test.is_built_with_cuda())
print("GPU available:", tf.config.list_physical_devices('GPU'))

# 设置文件路径
file_path_qqq = '/usr3/graduate/xz0224/co/qqq.csv'
file_path_cash = '/usr3/graduate/xz0224/co/cash.csv'

# 读取数据
data_qqq = pd.read_csv(file_path_qqq)
data_cash = pd.read_csv(file_path_cash)

# 确保日期列格式为datetime
data_qqq['date'] = pd.to_datetime(data_qqq['date'])
data_cash['date'] = pd.to_datetime(data_cash['date'])

# 创建 return_label 列
data_qqq['return_label'] = data_qqq['return'].apply(lambda x: '+' if x > 0 else '-')
data_cash['return_label'] = data_cash['return'].apply(lambda x: '+' if x > 0 else '-')

# 构建特征
def create_label_sequence(data):
    sequences = []
    for i in range(len(data)):
        if i >= 10:  # 确保有足够的数据来创建10天的序列
            sequence = ''.join(data['return_label'][i-10:i])
            sequences.append(sequence)
        else:
            sequences.append(None)  # 对于序列开始的部分，填充None
    return sequences

data_qqq['feature'] = create_label_sequence(data_qqq)
data_cash['feature'] = create_label_sequence(data_cash)

# 合并特征和目标变量，只保留qqq的特征
combined_data = pd.merge(data_qqq, data_cash, on='date')
combined_data['combined_feature'] = combined_data['feature_x']  # 只使用qqq的特征
combined_data['target'] = combined_data.apply(lambda row: 'qqq' if row['return_x'] > row['return_y'] else 'cash', axis=1)

# 设置时间范围
start_date = pd.to_datetime('2000-01-01')
end_date = pd.to_datetime('2024-06-30')

# 初始化投资金额
initial_investment = 100
current_value = initial_investment

# 加载已有的xlsx文件，如果不存在则创建一个新的
file_path = '/usr3/graduate/xz0224/co/investment_results_final(2006)(train_1years_c-c).xlsx'
if os.path.exists(file_path):
    wb = load_workbook(file_path)
    sheet = wb.active
    # 找到最后一行的日期和投资金额
    last_row = sheet.max_row
    current_date = pd.to_datetime(sheet.cell(row=last_row, column=1).value) + pd.Timedelta(days=1)
    current_value = sheet.cell(row=last_row, column=2).value
else:
    wb = openpyxl.Workbook()
    sheet = wb.active
    sheet.append(['Date', 'Investment_Value', 'Decision'])
    current_date = start_date

# 准备用于绘图的数据
dates = [current_date]
values = [current_value]

# 循环遍历每一天
while current_date <= end_date:
    print(f"Starting iteration for date: {current_date} at time: {time.strftime('%Y-%m-%d %H:%M:%S')}")
    # 设置训练集的时间范围（前五年）
    train_start = current_date - pd.DateOffset(years=1)
    train_end = current_date - pd.Timedelta(days=1)

    # 分割训练集和测试集
    train_set = combined_data[(combined_data['date'] >= train_start) & (combined_data['date'] <= train_end)]
    test_set = combined_data[combined_data['date'] == current_date]
    
    # 检查数据泄漏
    if not test_set.empty:
        assert not any(test_set.index.isin(train_set.index)), "Data leakage detected! Test set data found in the training set."
    
    # 在创建特征后，删除带有 None 值的行
    train_set = train_set.dropna(subset=['combined_feature'])
    test_set = test_set.dropna(subset=['combined_feature'])

    if not test_set.empty:
        # 现在进行 LSTM 模型训练的其他步骤
        X_train = train_set['combined_feature']
        y_train = train_set['target']
        X_test = test_set['combined_feature']
        y_test = test_set['target']

        # 将字符串序列转换为数值
        tokenizer = Tokenizer(char_level=True)
        tokenizer.fit_on_texts(X_train)
        X_train_seq = tokenizer.texts_to_sequences(X_train)
        X_test_seq = tokenizer.texts_to_sequences(X_test)

        # 确保所有序列具有相同的长度
        max_length = max([len(seq) for seq in X_train_seq])
        X_train_padded = pad_sequences(X_train_seq, maxlen=max_length, padding='post')
        X_test_padded = pad_sequences(X_test_seq, maxlen=max_length, padding='post')

        # LSTM 输入形状需要为 (样本数, 时间步长, 特征数)
        X_train_padded = np.expand_dims(X_train_padded, axis=-1)
        X_test_padded = np.expand_dims(X_test_padded, axis=-1)

        # 将目标变量转换为分类编码
        label_encoder = LabelEncoder()
        y_train_encoded = label_encoder.fit_transform(y_train)
        y_test_encoded = label_encoder.transform(y_test)
        y_train_categorical = to_categorical(y_train_encoded)
        y_test_categorical = to_categorical(y_test_encoded)

        # 打印类别映射
        class_mapping = dict(zip(label_encoder.classes_, label_encoder.transform(label_encoder.classes_)))
        print("Class mapping:", class_mapping)

        # 创建 LSTM 模型
        model = Sequential()
        model.add(LSTM(50, input_shape=(max_length, 1)))
        model.add(Dense(2, activation='softmax'))  # 假设有两个输出类别

        # 编译模型
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

        # 训练模型
        model.fit(X_train_padded, y_train_categorical, epochs=10, batch_size=32)

        # 进行模型预测
        y_pred = model.predict(X_test_padded)

        # 将预测转换为具体的投资决策（'qqq' 或 'cash'）
        decisions = np.argmax(y_pred, axis=1)

        # 模拟投资过程
        for i, decision in enumerate(decisions):
            # 获取对应日期的收益率
            date = test_set.iloc[i]['date']
            qqq_rate = data_qqq.loc[data_qqq['date'] == date, 'return'].values
            cash_rate = data_cash.loc[data_cash['date'] == date, 'return'].values

            # 检查收益率是否为 None，如果是则设为 0
            qqq_rate = qqq_rate[0] if len(qqq_rate) > 0 else 0
            cash_rate = cash_rate[0] if len(cash_rate) > 0 else 0

            # 根据预测决策更新投资金额
            if decision == 1:  # 'qqq'
                current_value *= (1 + qqq_rate)
            else:  # 'cash'
                current_value *= (1 + cash_rate)

            # 记录日期、金额和决策
            sheet.append([date, current_value, 'qqq' if decision == 1 else 'cash'])
            wb.save(file_path)

        # 清理模型和内存
        del model
        del train_set, test_set, X_train, y_train, X_test, y_test
        del tokenizer, X_train_seq, X_test_seq, X_train_padded, X_test_padded
        del label_encoder, y_train_encoded, y_test_encoded, y_train_categorical, y_test_categorical
        K.clear_session()
        gc.collect()
        print(f"Investment amount: {current_value} Date: {current_date}")

        # 使用 TensorFlow 函数来释放 GPU 内存
    current_date += pd.Timedelta(days=1)

# 保存最终的结果
wb.save(file_path)


In [2]:
import pandas as pd
import empyrical
import os

# 读取Excel文件中的数据路径
file_paths = {
    'MC-CO': '/usr3/graduate/xz0224/co/mc/investment_results_final(2006)(train_1_year_o-c).xlsx',
    'QQQ-CO': '/usr3/graduate/xz0224/co/investment_results_final(2006)(train_1years_c-c).xlsx',
}

# Function to read data from Excel or CSV files
def read_data(file_path):
    if file_path.endswith('.xlsx'):
        data = pd.read_excel(file_path)
    elif file_path.endswith('.csv'):
        data = pd.read_csv(file_path)
    return pd.to_datetime(data['Date']), data['Investment_Value']

# Read the data from the files and add to strategies_data
for name, path in file_paths.items():
    strategies_data[name] = read_data(path)


# Initialize DataFrames to store results
end_year_values_df = pd.DataFrame()
annual_returns_df = pd.DataFrame()
max_drawdowns_df = pd.DataFrame()
annual_volatility_df = pd.DataFrame()
sharpe_ratios_df = pd.DataFrame()

# Function to analyze investment strategy
def analyze_investment_strategy(dates, values):
    df = pd.DataFrame({'date': pd.to_datetime(dates), 'value': values})
    df.set_index('date', inplace=True)
    grouped = df.resample('Y')
    year_end_values = {}
    annual_returns = {}
    annual_max_drawdowns = {}
    annual_volatility = {}
    sharpe_ratios = {}

    for name, group in grouped:
        year = name.year
        year_end_values[year] = group.iloc[-1]['value']
        daily_returns = empyrical.simple_returns(group['value'])
        annual_return = empyrical.annual_return(daily_returns)
        max_drawdown = empyrical.max_drawdown(daily_returns)
        volatility = empyrical.annual_volatility(daily_returns)
        sharpe_ratio = annual_return / volatility if volatility != 0 else 0

        annual_returns[year] = annual_return
        annual_max_drawdowns[year] = max_drawdown
        annual_volatility[year] = volatility
        sharpe_ratios[year] = sharpe_ratio

    return year_end_values, annual_returns, annual_max_drawdowns, annual_volatility, sharpe_ratios

# Analyze each strategy and collect results
for strategy_name, (strategy_dates, strategy_values) in strategies_data.items():
    year_end_values, annual_returns, max_drawdowns, annual_volatility, sharpe_ratios = analyze_investment_strategy(strategy_dates, strategy_values)
    year_end_values_df_ = pd.DataFrame.from_dict(year_end_values, orient='index', columns=[strategy_name])
    annual_returns_df_ = pd.DataFrame.from_dict(annual_returns, orient='index', columns=[strategy_name])
    max_drawdowns_df_ = pd.DataFrame.from_dict(max_drawdowns, orient='index', columns=[strategy_name])
    annual_volatility_df_ = pd.DataFrame.from_dict(annual_volatility, orient='index', columns=[strategy_name])
    sharpe_ratios_df_ = pd.DataFrame.from_dict(sharpe_ratios, orient='index', columns=[strategy_name])

    end_year_values_df = pd.concat([end_year_values_df, year_end_values_df_], axis=1)
    annual_returns_df = pd.concat([annual_returns_df, annual_returns_df_], axis=1)
    max_drawdowns_df = pd.concat([max_drawdowns_df, max_drawdowns_df_], axis=1)
    annual_volatility_df = pd.concat([annual_volatility_df, annual_volatility_df_], axis=1)
    sharpe_ratios_df = pd.concat([sharpe_ratios_df, sharpe_ratios_df_], axis=1)

# Reset index to add 'Year' as the first column
end_year_values_df.reset_index(inplace=True)
annual_returns_df.reset_index(inplace=True)
max_drawdowns_df.reset_index(inplace=True)
annual_volatility_df.reset_index(inplace=True)
sharpe_ratios_df.reset_index(inplace=True)

# Rename the index column to 'Year'
end_year_values_df.rename(columns={'index': 'Year'}, inplace=True)
annual_returns_df.rename(columns={'index': 'Year'}, inplace=True)
max_drawdowns_df.rename(columns={'index': 'Year'}, inplace=True)
annual_volatility_df.rename(columns={'index': 'Year'}, inplace=True)
sharpe_ratios_df.rename(columns={'index': 'Year'}, inplace=True)

# Create the directory if it does not exist
os.makedirs('jul3', exist_ok=True)

# Save DataFrames to .xlsx files
end_year_values_df.to_excel('jul3/end_year_values-1.xlsx', index=False)
annual_returns_df.to_excel('jul3/annual_returns-1.xlsx', index=False)
max_drawdowns_df.to_excel('jul3/max_drawdowns-1.xlsx', index=False)
annual_volatility_df.to_excel('jul3/annual_volatility-1.xlsx', index=False)
sharpe_ratios_df.to_excel('jul3/sharpe_ratios-1.xlsx', index=False)


FileNotFoundError: [Errno 2] No such file or directory: '/usr3/graduate/xz0224/co/mc/investment_results_final(2006)(train_1_year_o-c).xlsx'

In [3]:
import pandas as pd
import empyrical
import os

# 读取Excel文件中的数据路径
file_paths = {
    'MC-CO': '/usr3/graduate/xz0224/co/mc/investment_results_final(2006)(train_1_year_o-c).xlsx',
    'QQQ-CO': '/usr3/graduate/xz0224/co/investment_results_final(2006)(train_1years_c-c).xlsx',
}

# Function to read data from Excel or CSV files
def read_data(file_path):
    if file_path.endswith('.xlsx'):
        data = pd.read_excel(file_path)
    elif file_path.endswith('.csv'):
        data = pd.read_csv(file_path)
    return pd.to_datetime(data['Date']), data['Investment_Value']

# Read the data from the files and add to strategies_data
for name, path in file_paths.items():
    strategies_data[name] = read_data(path)


# Initialize DataFrames to store results
end_year_values_df = pd.DataFrame()
annual_returns_df = pd.DataFrame()
max_drawdowns_df = pd.DataFrame()
annual_volatility_df = pd.DataFrame()
sharpe_ratios_df = pd.DataFrame()

# Function to analyze investment strategy
def analyze_investment_strategy(dates, values):
    df = pd.DataFrame({'date': pd.to_datetime(dates), 'value': values})
    df.set_index('date', inplace=True)
    grouped = df.resample('Y')
    year_end_values = {}
    annual_returns = {}
    annual_max_drawdowns = {}
    annual_volatility = {}
    sharpe_ratios = {}

    for name, group in grouped:
        year = name.year
        year_end_values[year] = group.iloc[-1]['value']
        daily_returns = empyrical.simple_returns(group['value'])
        annual_return = empyrical.annual_return(daily_returns)
        max_drawdown = empyrical.max_drawdown(daily_returns)
        volatility = empyrical.annual_volatility(daily_returns)
        sharpe_ratio = annual_return / volatility if volatility != 0 else 0

        annual_returns[year] = annual_return
        annual_max_drawdowns[year] = max_drawdown
        annual_volatility[year] = volatility
        sharpe_ratios[year] = sharpe_ratio

    return year_end_values, annual_returns, annual_max_drawdowns, annual_volatility, sharpe_ratios

# Analyze each strategy and collect results
for strategy_name, (strategy_dates, strategy_values) in strategies_data.items():
    year_end_values, annual_returns, max_drawdowns, annual_volatility, sharpe_ratios = analyze_investment_strategy(strategy_dates, strategy_values)
    year_end_values_df_ = pd.DataFrame.from_dict(year_end_values, orient='index', columns=[strategy_name])
    annual_returns_df_ = pd.DataFrame.from_dict(annual_returns, orient='index', columns=[strategy_name])
    max_drawdowns_df_ = pd.DataFrame.from_dict(max_drawdowns, orient='index', columns=[strategy_name])
    annual_volatility_df_ = pd.DataFrame.from_dict(annual_volatility, orient='index', columns=[strategy_name])
    sharpe_ratios_df_ = pd.DataFrame.from_dict(sharpe_ratios, orient='index', columns=[strategy_name])

    end_year_values_df = pd.concat([end_year_values_df, year_end_values_df_], axis=1)
    annual_returns_df = pd.concat([annual_returns_df, annual_returns_df_], axis=1)
    max_drawdowns_df = pd.concat([max_drawdowns_df, max_drawdowns_df_], axis=1)
    annual_volatility_df = pd.concat([annual_volatility_df, annual_volatility_df_], axis=1)
    sharpe_ratios_df = pd.concat([sharpe_ratios_df, sharpe_ratios_df_], axis=1)

# Reset index to add 'Year' as the first column
end_year_values_df.reset_index(inplace=True)
annual_returns_df.reset_index(inplace=True)
max_drawdowns_df.reset_index(inplace=True)
annual_volatility_df.reset_index(inplace=True)
sharpe_ratios_df.reset_index(inplace=True)

# Rename the index column to 'Year'
end_year_values_df.rename(columns={'index': 'Year'}, inplace=True)
annual_returns_df.rename(columns={'index': 'Year'}, inplace=True)
max_drawdowns_df.rename(columns={'index': 'Year'}, inplace=True)
annual_volatility_df.rename(columns={'index': 'Year'}, inplace=True)
sharpe_ratios_df.rename(columns={'index': 'Year'}, inplace=True)

# Create the directory if it does not exist
os.makedirs('jul3', exist_ok=True)

# Save DataFrames to .xlsx files
end_year_values_df.to_excel('jul3/end_year_values-1.xlsx', index=False)
annual_returns_df.to_excel('jul3/annual_returns-1.xlsx', index=False)
max_drawdowns_df.to_excel('jul3/max_drawdowns-1.xlsx', index=False)
annual_volatility_df.to_excel('jul3/annual_volatility-1.xlsx', index=False)
sharpe_ratios_df.to_excel('jul3/sharpe_ratios-1.xlsx', index=False)


FileNotFoundError: [Errno 2] No such file or directory: '/usr3/graduate/xz0224/co/mc/investment_results_final(2006)(train_1_year_o-c).xlsx'