# **M**achine **L**earning

### Version 1.0

---

### Imports

In [1]:
# needed to import parent folders
import sys
sys.path.append("../")

In [2]:
# Custom Modules
from tools.data_handler import DataHandler
from ml_tools import MLDataWrapper
from neuralnet import NeuralNetwork

In [3]:
import torch
from torch import optim
from torch import nn

import timeit

try:
    import _pickle as pickle
except:
    import pickle

In [5]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

### Data

In [6]:
data_handler = DataHandler()
data_handler.initialize(path='../tools/serialized_tool_objects/datahandler.p')
data = {}
for symbol in data_handler.symbols:
    X, y = data_handler.load_from_npz(symbol)
    ml_data_wrapper = MLDataWrapper(X, y)
    data_loader = ml_data_wrapper.get_dataloader(train_batch_size=64) # default train/test size of 0.75 to 0.25
    data[symbol] = data_loader

### Neural Network

In [7]:
# Hyperparameter
alpha = 0.005 # Learning Rate
num_epochs = 50
print_every = 30
dropout_prob = 0.3


num_inputs = ml_data_wrapper.shape[1]
print("The models have {} input nodes".format(num_inputs))

The models have 15 input nodes


In [8]:
stats = {}
start = timeit.default_timer()

# for each symbol, train a new neural network and save it in a folder
# for the final regressor, the models will be loaded into a dictionary
# given on the input symbol for the model, a price will be calculated 
# on the symbol-model
for symbol, dataloader in data.items():
    stats[symbol] = {}
    model = NeuralNetwork(num_inputs, drop_p=dropout_prob)
    model = model.to(device)
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=alpha)
    
    improved, first_test_rmse, best_test_rmse, seconds = \
        model.fit(dataloader, symbol, optimizer, criterion, num_epochs, print_every=print_every)
    
    # Create some statistics (maybe used later on)
    if improved:
        stats[symbol]['learned'] = 'X'
    else:
        stats[symbol]['learned'] = '-'
    stats[symbol]['first_test_rmse'] = first_test_rmse
    stats[symbol]['best_test_rmse'] = best_test_rmse
    stats[symbol]['seconds_trained'] = seconds
    stats[symbol]['num_data_train'] = len(dataloader['train'].dataset)
    stats[symbol]['num_data_test'] = len(dataloader['test'].dataset)
    
stop = timeit.default_timer()
seconds_total = stop - start
print("TOTAL TIME NEEDED: {:2.0f} MIN.".format(seconds_total / 60))

# Save the statistics
with open('stats/training_stats', 'wb') as file:
    pickle.dump(stats, file)

----------------------------------------------------------------------------
|START TRAINING FOR SYMBOL: [   A]                                         |
| TRAIN EPOCH | PROCESSED DATA      | TRAIN RMSE | TEST RMSE  | CHECKPOINT |
----------------------------------------------------------------------------
|    1/   2   |      0/  3520 ( 0%) |    33.42   |    31.74   |     X      |
|    1/   2   |   1920/  3520 (55%) |    10.40   |    10.47   |     X      |
|    2/   2   |      0/  3520 ( 0%) |     5.58   |     5.62   |     X      |
|    2/   2   |   1920/  3520 (55%) |     5.51   |     4.80   |     X      |
----------------------------------------------------------------------------
|TIME NEEDED FOR TRAINING:     2 SEC.                                      |
|FINISHED TRAINING. MODEL HAS IMPROVED NOTEWORTHLY.                        |
----------------------------------------------------------------------------

---------------------------------------------------------------------------

KeyboardInterrupt: 