# Predicting stock prices with Autogluon project

In [48]:
# Parameters
ticker = 'AAPL'
forecast_months = 10
lookback_months = 30


In [3]:
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgets
from ipywidgets import IntSlider, FloatSlider, Checkbox, Dropdown

style = {"description_width": "initial"}

@interact_manual(
    p_forecast_months=IntSlider(min=1, max=100, value=10, style=style),
    p_lookback_months=IntSlider(min=3, max=300, value=30, style=style),
    p_ticker=Dropdown(options=['AAPL', 'GLD'])
)
def set_params(p_forecast_months, p_lookback_months, p_ticker):
    global ticker
    global forecast_months
    global lookback_months
    ticker = p_ticker
    forecast_months = p_forecast_months
    lookback_months = p_lookback_months    

interactive(children=(IntSlider(value=10, description='p_forecast_months', min=1, style=SliderStyle(descriptio…

In [53]:
import time
import datetime
from dateutil.relativedelta import relativedelta
import pandas as pd
import os
import boto3

def get_data_df(ticker, period1, period2, suffix=''):
    int_period1 = int(time.mktime(period1.timetuple()))
    int_period2 = int(time.mktime(period2.timetuple()))
    interval = '1d' # 1d, 1m    
    url = f'https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={int_period1}&period2={int_period2}&interval={interval}&events=history&includeAdjustedClose=true'
    df = pd.read_csv(url)
    df = df.rename(columns={'Adj Close': 'Adj_Close'})
    return df

def save_and_upload(df_train, df_test):
    str_time = datetime.datetime.now().strftime('%Y-%m-%d-%H-%M-%S-%f')
    filename_prefix = f'{ticker}-f{forecast_months}-b{lookback_months}-{str_time}'
    #filename_prefix = f'{ticker}_{period3.strftime("%Y-%m-%d")}_f{forecast_months}_b{lookback_months}'    
    
    train_filename = f'{filename_prefix}_train.csv'
    test_filename = f'{filename_prefix}_test.csv'
    train_local_path = f'./{train_filename}'
    test_local_path = f'./{test_filename}'
    
    df_train.to_csv(train_filename, index=False)
    df_test.to_csv(test_filename, index=False)
    
    s3 = boto3.client('s3')
    bucket = 'edgarin-prj-stock-prediction-uw2'

    s3.upload_file(train_local_path, bucket, f'data/{train_filename}')
    s3.upload_file(test_local_path, bucket, f'data/{test_filename}')
    
    os.remove(train_local_path)
    os.remove(test_local_path)
    
    return filename_prefix

def gather_data(ticker, forecast_months, lookback_months, limit_date = None):
    # Let's calculate 3 milestones. Today, the forecast date (for example a month back) and the lookback date (for example 3 months back)
    period3 = limit_date if limit_date is not None else datetime.date.today() # Use datetime.datetime(2020, 12, 1, 23, 59) for a specific day

    period2 = period3 - relativedelta(months=forecast_months) # Alternative: datetime.timedelta(days=forecast_months * 30)
    period1 = period2 - relativedelta(months=lookback_months) # Alternative: datetime.timedelta(days=lookback_months * 30)
    
    df_train = get_data_df(ticker, period1, period2)
    df_test = get_data_df(ticker, period2, period3)

    training_job_name = save_and_upload(df_train, df_test)
    
    return training_job_name



In [55]:
gather_data(ticker, forecast_months, lookback_months)

'AAPL-f10-b30-2022-02-20-04-11-13-688429'

In [55]:
df_train

Unnamed: 0,Date,Open,High,Low,Close,target,Volume
0,2018-10-18,54.465000,54.935001,53.250000,54.005001,52.178070,130325200
1,2018-10-19,54.514999,55.314999,54.357498,54.827499,52.972733,132314800
2,2018-10-22,54.947498,55.840000,54.735001,55.162498,53.296410,115168400
3,2018-10-23,53.957500,55.812500,53.674999,55.682499,53.798817,155071200
4,2018-10-24,55.650002,56.057499,53.634998,53.772499,51.953430,163702000
...,...,...,...,...,...,...,...
622,2021-04-12,132.520004,132.850006,130.630005,131.240005,130.464600,91420000
623,2021-04-13,132.440002,134.660004,131.929993,134.429993,133.635727,91266500
624,2021-04-14,134.940002,135.000000,131.660004,132.029999,131.249924,87222800
625,2021-04-15,133.820007,135.000000,133.639999,134.500000,133.705307,89347100


In [22]:
df_test

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2021-04-19,133.509995,135.470001,133.339996,134.839996,134.043320,94264200
1,2021-04-20,135.020004,135.529999,131.809998,133.110001,132.323547,94812300
2,2021-04-21,132.360001,133.750000,131.300003,133.500000,132.711227,68847100
3,2021-04-22,133.039993,134.149994,131.410004,131.940002,131.160461,84566500
4,2021-04-23,132.160004,135.119995,132.160004,134.320007,133.526382,78657500
...,...,...,...,...,...,...,...
207,2022-02-10,174.139999,175.479996,171.550003,172.119995,172.119995,90865900
208,2022-02-11,172.330002,173.080002,168.039993,168.639999,168.639999,98566000
209,2022-02-14,167.369995,169.580002,166.559998,168.880005,168.880005,86062800
210,2022-02-15,170.970001,172.949997,170.250000,172.789993,172.789993,64286300


In [56]:
import boto3
s3 = boto3.resource('s3')

def get_s3_file_content(key):
    bucket = 'edgarin-prj-stock-prediction-uw2'
    csv_object = s3.Object(bucket, key)
    return csv_object.get()['Body'].read().decode('utf-8')    

def read_results(training_job_name):
    #csv_object = s3.Object(bucket, f'results/{training_job_name}/AAPL-f10_test_predictions.csv')    
    prefix = f'results/{training_job_name}/results'
    # print(prefix)
    predictions = get_s3_file_content(f'{prefix}_test_predictions.csv')
    fit_summary = get_s3_file_content(f'{prefix}_fit_summary.txt')
    leaderboard = get_s3_file_content(f'{prefix}_leaderboard.csv')    
    model_performance = get_s3_file_content(f'{prefix}_model_performance.txt')        
    #from io import StringIO
    #df = pd.read_csv(StringIO(content))
    return predictions, fit_summary, leaderboard, model_performance


In [59]:
training_job_name = 'AAPL-f10-b30-2022-02-20-04-11-13-688429'
predictions, fit_summary, leaderboard, model_performance = read_results(training_job_name)
print(predictions)

True,Predicted
132.323532,132.40031
132.711227,132.41559
131.160446,131.15169
133.526398,133.21661
133.924042,133.84695
133.59596299999998,133.7063
132.79075600000002,132.81955
132.69134499999998,132.89024
130.683289,130.65808
131.756897,131.63023
127.094612,127.39765
127.34314,127.60857
128.973465,128.84077
129.660553,129.6167
126.314705,126.09537
125.378685,125.07718
122.251923,122.07181
124.442642,124.345436
126.912178,126.52559
125.73716,125.36084
124.323143,124.3097
124.163834,123.72084
126.772766,126.403595
124.900703,124.727356
126.563652,126.12645
126.364502,125.9727
126.314705,125.88548
124.751335,124.64348
124.08416,124.03655
123.755554,123.61707
124.532257,124.26208
123.018684,122.94005
125.358765,125.094025
125.368721,125.09769
126.20517,125.81853
126.593529,126.19222
125.577835,125.371056
126.812599,126.46367
129.929382,129.52779
129.092926,129.07797
129.600784,129.37845
131.233856,130.87552
129.90948500000002,129.89873
131.741699,131.01955
133.414612,132.74883
133.135803,

'1'