# Changing to Parent Directory to Import Needed Libraries

In [2]:
import os

# Change this relative to your own directory structure
PARENT_DIR = '/Users/henrygilbert/GitHub/CS-6362/final_project'
os.chdir(PARENT_DIR)

# Importing Needed Libraries

In [24]:
import torch
import numpy as np
from typing import Tuple, List
import pandas as pd
import importlib
import copy
from torch.utils.data import DataLoader

import utilities
import factor_data_loader
import market_data_loader
from model import CVAE
from matplotlib import pyplot as plt

importlib.reload(utilities)


<module 'utilities' from '/Users/henrygilbert/GitHub/CS-6362/final_project/utilities.py'>

# Loading Data 

In [42]:


mdl = market_data_loader.MarketDataLoader()
fdl = factor_data_loader.FactorDataLoader()

start_ts = pd.Timestamp('2016-01-01')
end_ts = pd.Timestamp('2021-02-01')

monthly_eod_prices = mdl.get_eod_price_data_grouped('SPY', start_ts, end_ts, market_data_loader.GroupPeriod.MONTHLY)
all_eod_prices = np.concatenate([prices for prices in list(monthly_eod_prices.values())])

percent_change = np.diff(all_eod_prices)/all_eod_prices[:-1]
std_change = np.std(percent_change)
mean_change = np.mean(percent_change)

print(f"Mean: {mean_change}, Std: {std_change}")

week_size = 5
weekly_data = [percent_change[i:i+week_size] for i in range(0, len(percent_change), week_size)]

# only condition on previous week, no external factor
weekly_trainng_data = [
    (weekly_data[i-1], weekly_data[i]) 
    for i in range(1, len(weekly_data))
    if len(weekly_data[i]) == 5 and len(weekly_data[i-1]) == 5]

month_batch_size = 4
monthly_batches = [
    weekly_trainng_data[i:i+month_batch_size] 
    for i in range(0, len(weekly_trainng_data), month_batch_size)]


  group.index[0].to_period(group_by.value).to_timestamp(): group['close'].to_numpy()


# Training/Evaluation

In [39]:
batch_size = 4
batch_to_rmse = {}

for i in range(len(monthly_batches)):
    
    cvae = CVAE(5, 5).to(utilities.DEVICE)
    training_batches = copy.deepcopy(monthly_batches)
    test_batch = training_batches.pop(i)
    
    training_weeks = [week for batch in training_batches for week in batch]
    training_data = utilities.ConditionedMarketDataset(training_weeks)
    testing_data = utilities.ConditionedMarketDataset(test_batch)
   
    train_dataset = DataLoader(training_data, batch_size=batch_size, shuffle=True)
    testing_dataset = DataLoader(testing_data, batch_size=batch_size, shuffle=True)
    
    history = utilities.train_model(cvae, train_dataset, testing_dataset, epochs=10)
    utilities.save_loss(history, "loss.png")
    
    predicted_val_prices = []
    predicted_synthetic_prices = []
    actual_val_prices = []
    
    for batch in testing_dataset:
        
        price_batch = batch['price_data']
        print(price_batch)
        continue
        synthetic_price_batches = [torch.FloatTensor(np.array([np.random.randn(len(b)) for b in price_batch])) for _ in range(100)]
        
        conditioned_batch = batch['factor_data']
        price_batch = price_batch.to(utilities.DEVICE)
        
        predicted_prices = cvae(price_batch.float(), conditioned_batch.float())
        predicted_val_prices += (predicted_prices*std_price+mean_price).detach().numpy().tolist()
        actual_val_prices += (price_batch*std_price+mean_price).detach().numpy().tolist()
       
        sample_synthetic_prices = [cvae(synthetic_b.float(), conditioned_batch.float()).detach().numpy() for synthetic_b in synthetic_price_batches]
        mean_synthetic_prices = np.mean(sample_synthetic_prices, axis=0)
        predicted_synthetic_prices += (mean_synthetic_prices*std_price+mean_price).tolist()
    
    batch_to_rmse[i] = utilities.rmse_loss_fn(
        torch.FloatTensor(np.array(predicted_val_prices)), 
        torch.FloatTensor(np.array(actual_val_prices)))
    
    plt.plot(np.array(predicted_synthetic_prices).flatten(), label="synthetic mean over 100 samples")
    plt.plot(np.array(actual_val_prices).flatten(), label="historical realization")
    plt.legend()
    plt.xlabel("week number")
    plt.ylabel("price in USD")
    plt.savefig(f"experiments/experiment_1/graphs/synthetic_data_prediction_batch{i}.png")
    plt.clf()
    
plt.bar(batch_to_rmse.keys(), batch_to_rmse.values())
plt.xlabel("Test Batch Number")
plt.ylabel("RMSE between averaged synthetic and actual prices")
plt.savefig(f"experiments/experiment_1/graphs/rmse_{i}.png")
plt.clf()
    


tensor([[-1.7620, -1.7269, -1.8363, -1.7661, -1.8594],
        [-1.8537, -1.9083, -1.8848, -1.7980, -1.8632],
        [-1.8053, -1.8521, -1.8300, -1.7255, -1.7271],
        [-1.8062, -1.7804, -1.7736, -1.8562, -1.9135]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.7446, -1.7625, -1.7645, -1.7015, -1.7573],
        [-1.9133, -1.9169, -1.9715, -1.8861, -1.8148],
        [-1.6060, -1.5884, -1.5736, -1.5700, -1.6196],
        [-1.7373, -1.6843, -1.6945, -1.7339, -1.6261]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.4583, -1.4266, -1.4418, -1.4885, -1.4380],
        [-1.5077, -1.4785, -1.4841, -1.4776, -1.4801],
        [-1.5107, -1.5127, -1.5100, -1.4674, -1.4470],
        [-1.5974, -1.5938, -1.5208, -1.5267, -1.5342]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.4939, -1.4814, -1.4923, -1.4493, -1.4022],
        [-1.4147, -1.4400, -1.4029, -1.4439, -1.4699],
        [-1.3802, -1.3802, -1.3884, -1.3814, -1.3716],
        [-1.4019, -1.4072, -1.3741, -1.3591, -1.3546]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.3718, -1.3514, -1.3605, -1.3508, -1.3363],
        [-1.4348, -1.4755, -1.4298, -1.4735, -1.4721],
        [-1.4708, -1.4538, -1.4500, -1.3920, -1.4361],
        [-1.4882, -1.4590, -1.4654, -1.4051, -1.3732]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.3505, -1.3263, -1.3188, -1.3032, -1.3098],
        [-1.5099, -1.5924, -1.5109, -1.4325, -1.3688],
        [-1.3553, -1.3920, -1.4013, -1.4078, -1.3938],
        [-1.4357, -1.4056, -1.3922, -1.3999, -1.3385]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.2799, -1.2448, -1.2455, -1.2183, -1.2249],
        [-1.2117, -1.2167, -1.1964, -1.2150, -1.1929],
        [-1.3587, -1.3929, -1.3646, -1.3676, -1.2969],
        [-1.2063, -1.2040, -1.2092, -1.2036, -1.1956]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.1637, -1.1537, -1.1791, -1.1825, -1.1918],
        [-1.1997, -1.2312, -1.2169, -1.2117, -1.1716],
        [-1.1746, -1.1716, -1.1839, -1.1610, -1.1653],
        [-1.1510, -1.1766, -1.1673, -1.1562, -1.1635]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.1524, -1.1528, -1.1642, -1.2826, -1.2133],
        [-1.2837, -1.2860, -1.2375, -1.2808, -1.2799],
        [-1.1675, -1.1757, -1.1898, -1.1895, -1.1673],
        [-1.2788, -1.2260, -1.1943, -1.2206, -1.2618]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.2310, -1.2070, -1.2493, -1.2131, -1.2278],
        [-1.2516, -1.2287, -1.2249, -1.2430, -1.2192],
        [-1.2790, -1.2738, -1.2874, -1.2876, -1.3019],
        [-1.2738, -1.2613, -1.2686, -1.2688, -1.2468]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.1701, -1.1771, -1.1544, -1.1648, -1.1277],
        [-1.3338, -1.3634, -1.3831, -1.3895, -1.3005],
        [-1.2654, -1.2124, -1.1997, -1.2108, -1.2081],
        [-1.2620, -1.2720, -1.2850, -1.3003, -1.2989]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.1175, -1.1143, -1.0960, -1.1182, -1.1105],
        [-1.1216, -1.1404, -1.1392, -1.1091, -1.0928],
        [-1.0274, -1.0142, -0.9825, -0.9896, -0.9563],
        [-0.9964, -0.9771, -1.0161, -1.0059, -0.9859]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-1.0324, -1.0507, -1.0140, -0.9825, -0.9868],
        [-0.9710, -0.9882, -0.9776, -0.9943, -0.9778],
        [-0.9993, -1.0072, -1.0013, -0.9889, -1.0301],
        [-0.9662, -0.9828, -0.9837, -0.9712, -0.9821]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-0.9583, -0.9604, -0.9572, -0.9545, -0.9187],
        [-0.9907, -0.9583, -0.9142, -0.9191, -0.9268],
        [-0.9273, -0.9275, -0.9212, -0.8911, -0.8702],
        [-0.8433, -0.8200, -0.7932, -0.7966, -0.7898]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-0.7468, -0.7029, -0.7117, -0.7432, -0.7513],
        [-0.7581, -0.7615, -0.7581, -0.7507, -0.7434],
        [-0.7568, -0.6834, -0.7160, -0.7131, -0.7276],
        [-0.7454, -0.7547, -0.7493, -0.7305, -0.7273]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-0.7901, -0.8102, -0.8460, -0.8005, -0.8166],
        [-0.7848, -0.7781, -0.7620, -0.7753, -0.7824],
        [-0.8188, -0.8077, -0.8147, -0.8161, -0.8230],
        [-0.7790, -0.7962, -0.7824, -0.7869, -0.7842]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-0.8249, -0.7839, -0.8000, -0.7418, -0.7110],
        [-0.6807, -0.6898, -0.6990, -0.6723, -0.6755],
        [-0.7110, -0.7060, -0.6848, -0.6850, -0.6902],
        [-0.7128, -0.7092, -0.7201, -0.7085, -0.7056]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-0.5930, -0.5978, -0.6016, -0.5761, -0.5810],
        [-0.6028, -0.5838, -0.5876, -0.6039, -0.5944],
        [-0.6648, -0.6381, -0.6383, -0.6429, -0.6453],
        [-0.7697, -0.7509, -0.7167, -0.6884, -0.6768]], dtype=torch.float64)


<Figure size 1500x900 with 0 Axes>

tensor([[-0.6134, -0.6059, -0.6037, -0.6465, -0.5989],
        [-0.6299, -0.6243, -0.6281, -0.5883, -0.5774],
        [-0.5921, -0.6177, -0.5722, -0.6082, -0.6109],
        [-0.6460, -0.6370, -0.6250, -0.6155, -0.6644]], dtype=torch.float64)


KeyboardInterrupt: 

<Figure size 1500x900 with 0 Axes>