# Training

In [3]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

import time
import sys 

sys.path.insert(0, './app/src')

from Dataset import *
from TCN import *
from Train_Eval import *


In [4]:
history_length = 100
num_inputs = 81
batch_size = 10

## Check Dataset and DataLoaders

In [6]:
# Create a Dataset object
TrainDataset = Dataset('../Data/Input/Basic/input_data.pkl','../Data/Input/Basic/input_target.pkl',
                       startdate='2005-01-01',enddate='2019-01-01', history_length=history_length)
TestDataset = Dataset('../Data/Input/Basic/input_data.pkl','../Data/Input/Basic/input_target.pkl',
                      startdate='2019-01-01',enddate='2022-01-01', history_length=history_length)


In [7]:
# Check lengths of datasets
print(len(TrainDataset),len(TestDataset))

3422 782


In [8]:
# Look at first datapoint in dataset
indata = np.load('../Data/Input/Basic/input_data.pkl', allow_pickle=True) 
intarget = np.load('../Data/Input/Basic/input_target.pkl', allow_pickle=True) 

indata.loc['2019-01-02']

GLD_High      0.346163
GLD_Low      -0.370889
GLD_Return    0.070000
AGG_High      0.037534
AGG_Low      -0.093835
                ...   
WTI_Low      -4.705630
WTI_Return    1.420000
XLP_High      0.316957
XLP_Low      -1.049921
XLP_Return   -0.590000
Name: 2019-01-02 00:00:00, Length: 81, dtype: float64

In [9]:
# Look at first datapoint past 2019
indata.loc['2019-01-02']

GLD_High      0.346163
GLD_Low      -0.370889
GLD_Return    0.070000
AGG_High      0.037534
AGG_Low      -0.093835
                ...   
WTI_Low      -4.705630
WTI_Return    1.420000
XLP_High      0.316957
XLP_Low      -1.049921
XLP_Return   -0.590000
Name: 2019-01-02 00:00:00, Length: 81, dtype: float64

In [10]:
# The first data sample from the trainset should contain the fisrt datapoint as its first entry
TrainDataset[0]

(tensor([[ 0.3978, -0.6551, -0.6500,  ...,  0.7826, -0.5652, -0.3500],
         [ 0.4921, -0.1640, -0.1600,  ...,  0.6556,  0.0000, -0.5200],
         [ 0.9727, -0.1898, -1.2200,  ...,  0.3480, -0.6960,  0.4800],
         ...,
         [ 0.2396, -0.1917,  0.2600,  ...,  0.1701, -0.3827, -0.1700],
         [ 0.0717, -0.4299,  0.3400,  ...,  0.2560, -0.4693, -0.3400],
         [ 0.2878, -0.0959, -0.4300,  ...,  0.2984, -0.1705,  0.0900]]),
 tensor([0.4557]))

In [11]:
# The first data sample from the testset should contain the first point in 2019 as its last entry
TestDataset[0]

(tensor([[ 0.3312, -0.0436, -0.1500,  ...,  0.4865, -0.1310, -0.3000],
         [ 0.4708, -0.0698, -0.0400,  ...,  0.4132, -0.2066, -0.3700],
         [ 0.7525, -0.1151, -1.5100,  ...,  0.3757, -0.4508,  0.0000],
         ...,
         [ 0.0165, -0.2809,  0.4100,  ...,  1.1469, -0.3955,  0.0000],
         [ 0.0082, -0.3464,  0.1600,  ...,  0.0788, -0.9256,  0.4200],
         [ 0.3462, -0.3709,  0.0700,  ...,  0.3170, -1.0499, -0.5900]]),
 tensor([0.9066]))

In [12]:
# Create loader objects
TrainLoader = DataLoader(TrainDataset, batch_size=batch_size, shuffle=True)
TestLoader = DataLoader(TestDataset, batch_size=len(TestDataset), shuffle=True)

In [13]:
# Test Dataloader
for batch_idx, batch_data in enumerate(TrainLoader):
    
    print(batch_idx)
    
    q,t = batch_data
    print(q)
    print(t)
    

0
tensor([[[ 0.1111, -0.1984,  0.4200,  ...,  0.9635, -0.0727, -0.7800],
         [ 0.0703, -1.5543,  1.6100,  ...,  1.3840, -0.2953, -1.4900],
         [ 0.4230, -0.3603, -0.2900,  ...,  0.1837, -0.7349,  0.4400],
         ...,
         [ 0.2617, -0.4963,  0.3800,  ...,  0.0957, -0.6507,  0.4000],
         [ 0.3078, -0.3531, -0.3300,  ...,  0.2286, -0.4953,  0.4600],
         [ 1.9756, -0.2848, -1.4700,  ...,  1.4831, -0.2696, -1.0900]],

        [[ 0.2870, -0.5310,  0.9100,  ...,  0.6761, -0.5409, -0.4400],
         [ 0.6370, -0.3149,  0.2700,  ...,  0.6441, -0.2712, -0.2700],
         [ 0.2153, -0.3803, -0.2600,  ...,  0.1344, -0.8401,  0.8800],
         ...,
         [ 0.2818, -0.2818,  0.8300,  ...,  0.3151, -0.2205, -0.1900],
         [ 0.2924, -0.3941,  0.7800,  ...,  0.4452, -0.1272, -0.9100],
         [ 0.0127, -0.7416,  0.2700,  ...,  0.4144, -0.0956, -0.2500]],

        [[ 0.4589, -0.5965,  0.6500,  ...,  0.0000, -0.4380,  0.0000],
         [ 0.3810, -0.3810,  0.3500,  ..., 

In [14]:
# Create model object
TestTCN = TCN(history_length=history_length, num_inputs=num_inputs, num_channels=[5,10,5,5], kernel_size=3, dropout=0.2)


In [15]:
# Test Model
for batch_idx, batch_data in enumerate(TrainLoader):
    
    print(batch_idx)
    
    q,t = batch_data
    
    means, variances = TestTCN(q)
    
    print(means)
    print(variances)
    
    t = t.squeeze(-1)
    
    print(t)

0
tensor([ 0.0544,  0.0441, -0.0031,  0.0729,  0.0606,  0.0363, -0.0050,  0.0198,
         0.0085,  0.0219], grad_fn=<SelectBackward0>)
tensor([0.4781, 0.4696, 0.4667, 0.4823, 0.4851, 0.4876, 0.4586, 0.4835, 0.4908,
        0.4736], grad_fn=<MulBackward0>)
tensor([ 1.0341, -1.4088, -0.4402, -0.4355, -0.0716,  2.2296, -0.0253,  2.0685,
         0.9864,  1.6478])
1
tensor([ 0.0393,  0.0556,  0.0248,  0.0482,  0.0352, -0.0085,  0.0054,  0.0047,
         0.0555,  0.0279], grad_fn=<SelectBackward0>)
tensor([0.4756, 0.4902, 0.4804, 0.4715, 0.4712, 0.4905, 0.4803, 0.4861, 0.4829,
        0.4710], grad_fn=<MulBackward0>)
tensor([-0.8493, -0.7941, -0.6559,  0.3778,  0.1889,  0.5146,  0.0703,  0.3017,
         0.3528,  0.0572])
2
tensor([0.0556, 0.0423, 0.0724, 0.0168, 0.0237, 0.0691, 0.0387, 0.0301, 0.0950,
        0.0117], grad_fn=<SelectBackward0>)
tensor([0.4657, 0.4924, 0.4881, 0.4832, 0.4641, 0.4869, 0.4911, 0.4949, 0.4794,
        0.4728], grad_fn=<MulBackward0>)
tensor([ 0.5067,  0.0518,

## Loss Functions

To begin with we have two outputs from our network, we want these to correspond roughly to the mean and variance of our predictions. The purpose of this is to provide the user not only with a prediction for a price change but also a confidence in that prediction characterized by the variance value.




In [18]:
loss_fn = nn.GaussianNLLLoss(reduction='mean')

In [19]:
# Test Model
for batch_idx, batch_data in enumerate(TrainLoader):
    
    print(batch_idx)
    
    q,t = batch_data
    
    means, variances = TestTCN(q)
    
    print(means)
    print(variances)
    
    t = t.squeeze(-1)
    
    print(t)

    loss = loss_fn(means, t, variances)
    print(loss)
    print(loss.item())
    print()

0
tensor([0.0471, 0.0685, 0.0118, 0.0217, 0.0452, 0.0247, 0.0566, 0.0362, 0.0569,
        0.0384], grad_fn=<SelectBackward0>)
tensor([0.4704, 0.4854, 0.4719, 0.4622, 0.4871, 0.4935, 0.4843, 0.4804, 0.4705,
        0.4833], grad_fn=<MulBackward0>)
tensor([-0.8405, -0.4444, -1.1914,  2.6605,  0.5663, -0.1217, -0.4132,  0.9983,
         0.3626, -0.5254])
tensor(0.8413, grad_fn=<MeanBackward0>)
0.8413479924201965

1
tensor([-0.0057, -0.0085,  0.0358,  0.0460,  0.0563,  0.0248,  0.0145,  0.0475,
         0.0084,  0.0531], grad_fn=<SelectBackward0>)
tensor([0.4738, 0.4857, 0.4609, 0.4877, 0.4573, 0.4851, 0.4868, 0.4745, 0.4717,
        0.4701], grad_fn=<MulBackward0>)
tensor([-3.8086, -0.1507, -0.0160,  0.7351,  0.3415,  1.7904, -0.5501,  0.5144,
         2.7408, -0.0877])
tensor(2.3846, grad_fn=<MeanBackward0>)
2.3846335411071777

2
tensor([0.0573, 0.0158, 0.0359, 0.0553, 0.0521, 0.0550, 0.0178, 0.0656, 0.0283,
        0.0457], grad_fn=<SelectBackward0>)
tensor([0.4847, 0.4902, 0.4826, 0.48

## Train Test

In [21]:
Dataiter = iter(TrainLoader)
q,t = next(Dataiter)

In [22]:
optimizer = optim.AdamW(TestTCN.parameters(),lr=5e-4, weight_decay=0.01)

log_interval = 50

writer = SummaryWriter('test_output')

writer.add_graph(TestTCN,q)


In [23]:
# Test Model

TestTCN.train()

total_samples = 0
total_loss = 0
start_time = time.time()

for batch_idx, batch_data in enumerate(TrainLoader):
    
    print(batch_idx+1)
    
    total_samples += len(batch_data[0])
    q,t = batch_data
    
    optimizer.zero_grad()
    
    means, variances = TestTCN(q)
    t = t.squeeze(-1)
    
    loss = loss_fn(means, t, variances)
    total_loss += loss.item()
    
    loss.backward()
    optimizer.step()
    
    if (batch_idx > 0 and (batch_idx+1) % log_interval == 0) or (batch_idx+1 == len(TrainLoader)):
        
        avg_loss = total_loss / log_interval
        elapsed = time.time() - start_time
        
        print('| {:5d}/{:5d} batches | {:5d}/{:5d} samples |'
              ' ms/batch {:5.2f} | loss {:5.8f} |'.format(
            batch_idx+1, len(TrainLoader), total_samples, len(TrainLoader.sampler),
            elapsed * 1000 / log_interval, avg_loss))
        
        batch_iter = batch_idx+1
            
        writer.add_scalar('loss', avg_loss, batch_iter)
        writer.add_scalar('lr', optimizer.param_groups[0]['lr'], batch_iter)
        
        start_time = time.time()
        total_loss = 0
        
    print()

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50
|    50/  343 batches |   500/ 3422 samples | ms/batch  4.58 | loss 0.69342834 |

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100
|   100/  343 batches |  1000/ 3422 samples | ms/batch  4.58 | loss 0.80097201 |

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150
|   150/  343 batches |  1500/ 3422 samples | ms/batch  3.28 | loss 0.65452175 |

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173



## Evaluate Test

In [25]:
# Test Evaluate

TestTCN.eval()

with torch.no_grad():

    for batch_data in TestLoader: 
    
        batch_inputs, batch_targets = batch_data
        means, variances = TestTCN(batch_inputs)
        batch_targets = batch_targets.squeeze(-1)
        
        print('means\n',means)
        print('variances\n',variances)
        print('targets\n',batch_targets)
        
        prob_density = (1.0/np.sqrt(2.0*np.pi*variances))*np.exp(-0.5*(means-batch_targets)**2/variances)
        absolute_diff = np.abs(means-batch_targets)
        avrg_mean = np.average(means)
        avrg_abs_mean = np.average(abs(means))
        avrg_variance = np.average(variances)

        
        print('probability density\n',prob_density)
        print('absolute difference\n',absolute_diff)
        print('average mean\n',avrg_mean)
        print('average absolute mean\n',avrg_abs_mean)
        print('average variance\n',avrg_variance)
        
        print('------------------------------------')

means
 tensor([-6.7464e-04,  2.6822e-03,  4.8794e-04, -1.7522e-03, -3.9598e-03,
        -2.4428e-03,  3.7999e-03, -4.4279e-03,  2.2213e-02, -2.5096e-03,
        -7.8684e-04, -3.1355e-02, -4.4016e-03, -1.9903e-03,  3.4501e-04,
        -1.4080e-03,  2.0838e-02, -1.7182e-03, -1.5345e-03,  2.4324e-03,
        -3.9833e-03, -2.2700e-02,  1.7515e-03,  5.6871e-03,  5.9170e-04,
        -5.0857e-02, -1.4509e-03, -4.8348e-03, -2.0543e-03,  1.5530e-03,
        -9.9989e-05, -2.4373e-03,  9.9958e-03, -3.6081e-04,  1.2364e-02,
        -1.9686e-02, -1.9585e-04, -2.4855e-02,  1.0845e-03,  5.8648e-02,
         1.7502e-03, -3.7067e-03, -8.8533e-03,  2.8512e-02, -2.7123e-03,
        -3.6836e-03, -1.0931e-02, -2.5729e-03,  2.6782e-03, -2.6621e-02,
        -2.2417e-03, -2.0540e-03, -6.8228e-04, -4.6417e-03, -2.3708e-03,
         4.3817e-03,  7.6494e-03,  1.4024e-03, -2.2257e-03, -2.2502e-02,
        -1.6039e-03,  1.0005e-03, -2.4467e-03,  1.7399e-03,  1.9335e-03,
         6.5790e-03, -6.7967e-04, -1.0102e-0

In [26]:
# Test Evaluate

def evaluate(model, test_loader):

    model.eval()

    with torch.no_grad():
        
        for batch_data in test_loader: 

            batch_inputs, batch_targets = batch_data
            means, variances = model(batch_inputs)
            batch_targets = batch_targets.squeeze(-1)

            print('means\n',means)
            print('variances\n',variances)
            print('targets\n',batch_targets)

            prob_density = (1.0/np.sqrt(2.0*np.pi*variances))*np.exp(-0.5*(means-batch_targets)**2/variances)
            absolute_diff = np.abs(means-batch_targets)
            avrg_mean = np.average(means)
            avrg_abs_mean = np.average(abs(means))
            avrg_variance = np.average(variances)


            print('probability density\n',prob_density)
            print('absolute difference\n',absolute_diff)
            print('average mean\n',avrg_mean)
            print('average absolute mean\n',avrg_abs_mean)
            print('average variance\n',avrg_variance)

            print('------------------------------------')

In [27]:
evaluate(TestTCN,TestLoader)

means
 tensor([ 4.4756e-03, -2.6621e-02, -1.7182e-03,  3.4048e-03,  3.9394e-03,
         2.3008e-02,  1.3272e-02,  1.1252e-03, -2.2417e-03, -6.1291e-03,
         8.0311e-02,  6.5936e-04, -4.1377e-03, -2.4721e-03,  2.3390e-03,
        -1.4485e-03, -1.5865e-03, -5.9693e-04,  7.0628e-03,  8.4505e-03,
         1.0438e-03, -3.3123e-03,  1.7914e-03,  3.3881e-03, -3.5048e-03,
        -3.7067e-03, -1.6693e-02,  1.1568e-03, -3.5312e-02,  8.8969e-05,
         3.0555e-04, -1.6000e-03, -6.1746e-03, -1.5283e-03, -1.1280e-02,
        -5.0857e-02, -9.1414e-04,  1.9677e-03, -4.0507e-04, -2.6721e-03,
         9.5544e-04, -1.6316e-02, -3.4925e-03,  6.3777e-02,  7.6690e-04,
        -3.1922e-02,  2.0782e-03,  1.0223e-02,  6.2776e-02, -4.4279e-03,
         7.4951e-04, -5.2718e-03, -5.1785e-03,  3.4457e-03, -1.4080e-03,
        -2.3708e-03,  1.4024e-03, -1.0102e-03, -2.3348e-03, -9.6613e-04,
         5.1062e-03,  6.1150e-04, -2.5548e-03, -3.4552e-03, -3.3436e-03,
         2.1491e-02, -3.6937e-03, -1.1190e-0

In [28]:
# Test Evaluate

def evaluate(model, test_loader):

    model.eval()
    
    with torch.no_grad():
                
        for data in test_loader:

            inputs, targets = data
            targets = targets.squeeze(-1)
            
            means, variances = model(inputs)
            
            loss = loss_fn(means, targets, variances)
            prob_density = (1.0/np.sqrt(2.0*np.pi*variances))*np.exp(-0.5*(means-targets)**2/variances)
            absolute_diff = np.abs(means-targets)
        
        return loss, means, variances, prob_density, absolute_diff
            

In [29]:
loss, means, variances, prob_densities, abs_diffs = evaluate(TestTCN,TestLoader)

print(loss)
print(means.shape)
print(variances.shape)
print(prob_densities.shape)
print(abs_diffs.shape)

tensor(0.4564)
torch.Size([782])
torch.Size([782])
torch.Size([782])
torch.Size([782])


In [30]:
print(loss)
print(torch.mean(prob_densities))
print(torch.mean(abs_diffs))
print(torch.mean(means))
print(torch.mean(abs(means)))
print(torch.mean(variances))


tensor(0.4564)
tensor(0.3151)
tensor(0.6658)
tensor(-3.6187e-05)
tensor(0.0080)
tensor(0.9534)


In [31]:
writer.add_scalar('average loss', loss, 1)
writer.add_scalar('average probability density', torch.mean(prob_densities), 1)
writer.add_scalar('average absolute difference', torch.mean(abs_diffs), 1)
writer.add_scalar('average mean', torch.mean(means), 1)
writer.add_scalar('average absolute mean', torch.mean(abs(means)), 1)
writer.add_scalar('average variance', torch.mean(variances), 1)