In [182]:
import sys
import numpy as np
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA


In [3]:
# Let's start by properly reading the data

# at hackerrank: 
# data = sys.stdin.readlines()
# data = [line.rstrip() for line in data]

# at VScode:
file_name = "stocks.txt"
with open(file_name, 'r') as file:
    data = file.read()

data = data.split("\n")
print(data) 

['90 2 400', 'iStreet 10 4.54 5.53 6.56 5.54 7.60', 'HR 0 30.54 27.53 24.42 20.11 17.50']


In [116]:
money = m = int(data[0].split()[0])
n_stocks = k = int(data[0].split()[1])
days_left = d = int(data[0].split()[2])

stocks_info = []
for i in range(1,k+1):
    row = list(data[i].split())
    stocks_info.append(row)
stocks_info

[['iStreet', '10', '4.54', '5.53', '6.56', '5.54', '7.60'],
 ['HR', '0', '30.54', '27.53', '24.42', '20.11', '17.50']]

In [162]:
# 0. Store everything in a pd.DataFrame
colnames = ['stock_name']+['owned']+['price_' + str(i) for i in range(4,-1,-1)] 
stocks_df = pd.DataFrame(stocks_info, 
                         columns= colnames)
stocks_df.iloc[0:k, 1:7] = (stocks_df.iloc[0:k, 1:7]).astype(float)
# m , d,stocks_df

# 1. Let's obtain the day-to-day difference

# for row_index in range(stocks_df.shape[0]):
#     total_change = stocks_df.loc[row_index,'price_4':'price_0'].pct_change().fillna(0)
#     stocks_df.loc[row_index, 'diff_4'] = total_change[0]
#     stocks_df.loc[row_index, 'diff_3'] = total_change[1]
#     stocks_df.loc[row_index, 'diff_2'] = total_change[2]
#     stocks_df.loc[row_index, 'diff_1'] = total_change[3]
#     stocks_df.loc[row_index, 'diff_0'] = total_change[4]

diff_cols = ['diff_4', 'diff_3', 'diff_2', 'diff_1', 'diff_0']
stocks_df[diff_cols] = stocks_df.loc[:, 'price_4':'price_0'].pct_change(axis=1).fillna(0)

stocks_df

Unnamed: 0,stock_name,owned,price_4,price_3,price_2,price_1,price_0,diff_4,diff_3,diff_2,diff_1,diff_0
0,iStreet,10,4.54,5.53,6.56,5.54,7.6,0.0,0.218062,0.186257,-0.155488,0.371841
1,HR,0,30.54,27.53,24.42,20.11,17.5,0.0,-0.098559,-0.112968,-0.176495,-0.129786


In [180]:
# 2. Split train-test data #####
# Need to pay attention here to temporal data (time series)

# train_size = int(len(stocks_df))
# train_size
train_data = stocks_df.loc[:,'diff_3':'diff_0']
train_data

Unnamed: 0,diff_3,diff_2,diff_1,diff_0
0,0.218062,0.186257,-0.155488,0.371841
1,-0.098559,-0.112968,-0.176495,-0.129786


In [210]:
x = stocks_df.loc[0,'diff_3':'diff_0']
# print(x)
x = np.asarray(x).astype(float)
print(x)
model = ARIMA(x, order = (1,0,1))
# # np.array(x)


[ 0.21806167  0.18625678 -0.1554878   0.37184116]


In [220]:
# 3. Fit ARIMA ####
# and
# 4. Make predictios for every day
# As every stock will have different patterns, we need to store them in a dictionary of models ####

# As input: stocks_df, a nice pd.DataFrame with columns indicating the time difference

span = 5
arima_models = {}
next_days = ['day_' + str(i) for i in range(1,span+1)] 

for row_index in range(stocks_df.shape[0]):
    train_data = stocks_df.loc[row_index,'diff_3':'diff_0']
    train_data = np.asarray(train_data).astype(float)

    model = ARIMA(train_data, order = (1,0,1))
    model_fit = model.fit()
    # print(model_fit.mle_retvals)

    arima_models[stocks_df.loc[row_index,'stock_name']] = model_fit

    stocks_df.loc[row_index, next_days] = model_fit.forecast(steps = span)
stocks_df



Unnamed: 0,stock_name,owned,price_4,price_3,price_2,price_1,price_0,diff_4,diff_3,diff_2,...,next_day_diff,day1,day2,day3,day4,day_1,day_2,day_3,day_4,day_5
0,iStreet,10,4.54,5.53,6.56,5.54,7.6,0.0,0.218062,0.186257,...,0.0,-0.059856,0.209716,0.049414,0.144738,-0.059856,0.209716,0.049414,0.144738,0.088054
1,HR,0,30.54,27.53,24.42,20.11,17.5,0.0,-0.098559,-0.112968,...,0.0,-0.124129,-0.130065,-0.131348,-0.131625,-0.124129,-0.130065,-0.131348,-0.131625,-0.131685


In [219]:
stocks_df

Unnamed: 0,stock_name,owned,price_4,price_3,price_2,price_1,price_0,diff_4,diff_3,diff_2,diff_1,diff_0,next_day_diff,day1,day2,day3,day4
0,iStreet,10,4.54,5.53,6.56,5.54,7.6,0.0,0.218062,0.186257,-0.155488,0.371841,0.0,-0.059856,0.209716,0.049414,0.144738
1,HR,0,30.54,27.53,24.42,20.11,17.5,0.0,-0.098559,-0.112968,-0.176495,-0.129786,0.0,-0.124129,-0.130065,-0.131348,-0.131625
