# Stock Trend Predictor
__Juan Javier Arosemena__
## Model Training

In [3]:
import os
from os.path import isfile, join
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import statistics
from statistics import mean, mode, median, stdev
import sagemaker
import boto3
from sagemaker.pytorch import PyTorch

In [9]:
time_range = [1, 5, 10, 20, 90, 270]
time_words = {1:'day', 5:'week', 10:'two_weeks', 20:'month', 90:'four_months', 270:'year'}
data_dir = join('data/') 
final_data_dir = join(data_dir + 'final/')

prefix = 'stock-trend-predictor'

In [4]:
# session and role
sagemaker_session = sagemaker.Session()
role = sagemaker.get_execution_role()

# create an S3 bucket
bucket = sagemaker_session.default_bucket()

In [10]:
data = sagemaker_session.upload_data(final_data_dir, bucket=bucket, key_prefix=prefix)

In [12]:
estimators = []
for t in time_range:
    estimator = PyTorch(entry_point='train.py',
                        source_dir='source',
                        framework_version=1.0, 
                        role=role,
                        train_instance_count=1,
                        train_instance_type='ml.p2.xlarge',
                        sagemaker_session=sagemaker_session)
    estimators.append(estimator)

In [14]:
for t, estimator in zip(time_range, estimators):
    estimator.fit({'training': data+'/'+time_words[t]+'/train.csv'}, wait=False)

2019-10-18 21:41:44 Starting - Starting the training job...
2019-10-18 21:41:45 Starting - Launching requested ML instances...
2019-10-18 21:42:43 Starting - Preparing the instances for training.........
2019-10-18 21:44:01 Downloading - Downloading input data...
2019-10-18 21:44:26 Training - Downloading the training image...
2019-10-18 21:45:09 Training - Training image download completed. Training in progress..[31mbash: cannot set terminal process group (-1): Inappropriate ioctl for device[0m
[31mbash: no job control in this shell[0m
[31m2019-10-18 21:45:09,664 sagemaker-containers INFO     Imported framework sagemaker_pytorch_container.training[0m
[31m2019-10-18 21:45:09,688 sagemaker_pytorch_container.training INFO     Block until all host DNS lookups succeed.[0m
[31m2019-10-18 21:45:11,116 sagemaker_pytorch_container.training INFO     Invoking user training script.[0m
[31m2019-10-18 21:45:11,343 sagemaker-containers INFO     Module train does not provide a setup.py. [

In [17]:
predictors = []
for estimator in estimators:
    predictor = estimator.deploy(1, 'ml.m4.xlarge', wait=False)
    predictors.append(predictor)

--------------------------------------------------------------------------!

In [20]:
for t, predictor in zip(time_range, predictors):
    # read in test data, assuming it is stored locally
    test_data = pd.read_csv(join(final_data_dir, time_words[t]+"/test.csv"), header=None, names=None)
    # labels are in the first column
    test_y = test_data.iloc[:,0]
    test_x = test_data.iloc[:,1:]

    # First: generate predicted, class labels
    
    test_y_preds_ = predictor.predict(test_x)
    test_y_preds = test_y_preds_.round()


    # Second: calculate the test accuracy

    accuracy = 0
    fp = 0
    fn = 0
    for y, y_ in zip(test_y, test_y_preds):
        accuracy += 1 if y==y_ else 0
        fp += 1 if not y and y_ else 0
        fn += 1 if y and not y_ else 0

    accuracy /= len(test_y)
    print(f'\n\n{t} DAYS PREDICTION MODEL:')
    print('Accuracy: '+str(accuracy))
    print('False positives: '+str(fp))
    print('False negatives: '+str(fn))

    predictor.delete_endpoint()