In [1]:
from alpha_vantage.timeseries import TimeSeries
import pandas as pd


#Data Preprocessing

ts = TimeSeries(key='0IN175LIOUOBEICW',output_format = 'pandas')
# Get json object with the intraday data and another with  the call's metadata
data, meta_data = ts.get_daily_adjusted(symbol='BOX', outputsize='full')
data = data.rename(columns = {'1. open':'Open','2. high':'High', '3. low':'Low', 
                 '4. close':'Close', '5. adjusted close':'Price', '6. volume':'Volume',
                 '7. dividend amount':'Dividends', '8. split coefficient':'Stock Splits'})
data = data.sort_values(by='date', ascending=False)
#Create the additional features
data['Yesterday_Price'] = data['Price'].shift(-1)
data['Tomorrow_Price'] = data['Price'].shift(1)
data['Next_Week_Price'] = data['Price'].shift(7)
data = data[['Open', 'High', 'Low', 'Volume', 'Stock Splits','Dividends', 'Yesterday_Price', 'Price','Tomorrow_Price','Next_Week_Price']]
today = data.head(1)
today_date = data.index[0]
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
imp = IterativeImputer(max_iter=10, verbose =0)
data2 = data.iloc[:,:]
imp.fit(data2)
df = imp.transform(data2)
df = pd.DataFrame(df, columns=data2.columns)


# Inialize dictionary of values

from collections import defaultdict
dd = defaultdict(lambda:dd)
levels = dd
levels = {
    1 : {
        "Raw Data" : {"features": 'X' , "target": 'Y'},
        "Train Data" : {"features": 'X_train', 'train_target': 'Y_train'},
        "Test Data":{'features': 'X_test', 'test_target': 'Y_test'},
        "Scalar" : 'scaler', 
        "Model": 'mlp',
        "Results" : {'train_acc' : 'acc', 'test_acc' : 'acc'},
        'Predictions' : 'y_hat',
    },
    2 : {
        "Raw Data" : {"features": 'X' , "target": 'Y'},
        "Train Data" : {"features": 'X_train', 'train_target': 'Y_train'},
        "Test Data":{'features': 'X_test', 'test_target': 'Y_test'},
        "Scalar" : 'scaler', 
        "Model": 'mlp',
        "Results" : {'train_acc' : 'acc', 'test_acc' : 'acc'},
        'Predictions' : 'y_hat',
    },
    3 : {
        "Raw Data" : {"features": 'X' , "target": 'Y'},
        "Train Data" : {"features": 'X_train', 'train_target': 'Y_train'},
        "Test Data":{'features': 'X_test', 'test_target': 'Y_test'},
        "Scalar" : 'scaler', 
        "Model": 'mlp',
        "Results" : {'train_acc' : 'acc', 'test_acc' : 'acc'},
        'Predictions' : 'y_hat',
}
}


#Split Data

from sklearn.model_selection import train_test_split
for l in levels:
    #split data into features and target
    X = df.iloc[:,:-4+int(l)].astype(float)
    y = df.iloc[:,-4+int(l)].astype(float)
    #splite data into training and testing
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True)
    
    #save the datasets
    levels[l]['Raw Data']['features'] = X
    levels[l]['Raw Data']['target'] = y
    levels[l]['Train Data']['features'] = X_train
    levels[l]['Test Data']['features'] = X_test
    levels[l]['Train Data']['target'] = y_train
    levels[l]['Test Data']['target'] = y_test
   
 #Scale Data

from sklearn.preprocessing import StandardScaler
for l in levels:
    #iniatiliza scaler
    scaler = StandardScaler()
    #Pull features and scaler
    train_features = levels[l]['Train Data']['features']
    test_features = levels[l]['Test Data']['features']
    
    #Fit scalar and scale features
    fit_scaler = scaler.fit(train_features)
    train_features = fit_scaler.transform(train_features)
    test_features = fit_scaler.transform(test_features)
    
    #update variables in dictionary 
    levels[l]['Train Data']['features'] = train_features
    levels[l]['Scalar'] = fit_scaler
    levels[l]['Test Data']['features'] = test_features
 


 # Build Models

from sklearn.neural_network import MLPRegressor
from sklearn.model_selection import GridSearchCV
#Inialize model for each set and add to dict
levels[1]['Model'] = MLPRegressor(max_iter = 300)
levels[2]['Model'] = MLPRegressor(max_iter = 300)
levels[3]['Model'] = MLPRegressor(max_iter = 300)
#define parameter space
parameter_space = {
    'hidden_layer_sizes':[(100,),(100,50,25),(100,50,25,25)],
    'alpha':[0.0001, 0.001, 0.01, 0.1, 1, 10],
    'learning_rate_init':[0.001, 0.01, 0.1, 1],
}
for l in levels:
    #Bring in required elements
    mlp = levels[l]['Model']
    train_features = levels[l]['Train Data']['features']
    train_labels = levels[l]['Train Data']['target']
    test_features = levels[l]['Test Data']['features']
    test_labels = levels[l]['Test Data']['target']
    #Optimize parameters and train model
    mlp = GridSearchCV(mlp, parameter_space, n_jobs=-1, cv=5)
    mlp_trained = mlp.fit(train_features,train_labels)
    #Evaluate accuracy
    train_acc, test_acc = mlp_trained.score(train_features,train_labels) , mlp_trained.score(test_features,test_labels)
    #return model and metrics
    levels[l]['Model'] = mlp_trained
    levels[l]['Results']['train_acc'] = train_acc
    levels[l]['Results']['test_acc'] = test_acc

    
# Make Predictions

result = {
    "Today" : levels[1]['Results'],
    "Tomorrow" : levels[2]['Results'],
    "Next Week" : levels[3]['Results'],
}
results = pd.DataFrame(result)

for l in levels:
    print("Level : " , l)
    #bring in each model and adjusted dataset
    mlp = levels[l]['Model']
    features = levels[l]['Raw Data']['features']
    scalar = levels[l]['Scalar']
    #scale the data to align with model sclaing and select first row as today
    features_scaled = pd.DataFrame(scalar.transform(features), columns = features.columns)
    today = features_scaled.head(1)
    #make prediction and return it
    prediction = mlp.predict(today)
    levels[l]['Predictions'] = prediction.astype(float)
    
predictions = {
    "Today" : levels[1]['Predictions'],
    "Tomorrow" : levels[2]['Predictions'],
    "Next Week" : levels[3]['Predictions'],
}
predictions = pd.DataFrame(predictions)

summary = predictions.append(results, ignore_index=True)
summary.index = ['Prediction', 'train_acc', 'test_acc']
summary