In [1]:
import pandas as pd
import numpy as np
from statsmodels.tsa.arima_model import ARIMA

# Alibaba Data
### Write more

In [2]:
alibaba_data=pd.read_pickle('Alibaba_Azure.pkl')
alibaba_data=alibaba_data[0:8751]
endvalue=alibaba_data.shape[0]
alibaba_data.shape

(8751, 1)

# Azure Data

In [3]:
azure_data=pd.read_pickle('Azure_Alibaba.pkl')
azure_data.shape

(8571, 1)

In [4]:
azure_data=np.array(azure_data,dtype=float)
azure_data=np.ravel(azure_data)
azure_data.shape

(8571,)

In [5]:
alibaba_data=np.array(alibaba_data,dtype=float)
alibaba_data=np.ravel(alibaba_data)
alibaba_data.shape

(8751,)

## Removing NaN values

In [6]:
azure_data=np.nan_to_num(azure_data)
alibaba_data=np.nan_to_num(alibaba_data)

## Combining the data into one

In [7]:
combined=np.append(alibaba_data,azure_data)

## Generating Training and Test data
### Train and Test data is generated according to the shape of the traces

In [8]:
train= combined[0:endvalue]

## Function to get valid list

In [9]:
def try_forecast_model(p,q,d):
    temp=[]
    model=ARIMA(train,order=(p,q,d))
    try:
        model_fit=model.fit(disp=0)
    except:
        temp.append(p)
        temp.append(q)
        temp.append(d)
        invalid_list.append(temp)

## Declaring 'p','q','d' values for the model
### These values are being declared to do a sort of grid search

In [10]:
AR_parameter=[1,2,3,4,5,6,7,8,9,10,11,12]
MA_parameter=[1,2]
difference=[1,2,3,4,5]

In [11]:
total_list=[]
invalid_list=[]
for p in AR_parameter:
    for q in MA_parameter:
        for d in difference:
            initial_temp=[]
            initial_temp.append(p)
            initial_temp.append(q)
            initial_temp.append(d)
            total_list.append(initial_temp)
            try_forecast_model(p,q,d)



## Finding a valid list of hyperparameters

In [12]:
valid_list = []
for list in total_list:
    if list not in invalid_list:
        valid_list.append(list) 

## Forecasting Model

In [13]:
rmse_accuracy=[]
predicted=[]
mape=[]
def forecast_model(p,q,d):
    temp=[]
    model=ARIMA(train,order=(p,q,d))
    model_fit=model.fit(disp=0)
    end_values=[10,15,50,100]
    for value in end_values:
        forecast=model_fit.predict(start=endvalue,end=endvalue+value)
        predicted.append(forecast)
        actual_values=combined[endvalue:(endvalue+value+1)]
        accuracy=np.mean((forecast - actual_values)**2)**.5
        rmse_accuracy.append(accuracy)
        acc = np.mean(np.abs(forecast - actual_values)/np.abs(actual_values))
        mape.append(acc)

### Running the model

In [14]:
for val in valid_list:
    forecast_model(val[0],val[1],val[2])



In [15]:
rmse_accuracy

[0.004717542492775101,
 0.003996631655435098,
 0.0027380915253024402,
 0.003172196301251046,
 0.004543557050719765,
 0.0038550174605030073,
 0.0026735979336141484,
 0.0031442992291838825,
 0.0046936282924203866,
 0.003977206884372773,
 0.002729200972618824,
 0.0031683263780215407,
 0.004863411290432565,
 0.004115148146702495,
 0.0027926367865067497,
 0.003196118023848854,
 0.004687619548166118,
 0.003972309698671974,
 0.0027269578005206783,
 0.0031673506506058936,
 0.004734944158730235,
 0.004010755062378641,
 0.0027445654402624376,
 0.003175022154742402,
 0.00459724899380854,
 0.0038989110912986714,
 0.002693434423300642,
 0.0031528336268064495,
 0.004787565679322591,
 0.004053324455086346,
 0.0027639379246748247,
 0.003183493263573654,
 0.005140580570076079,
 0.0043405962744053435,
 0.0028977268627223907,
 0.0032430124218334098,
 0.004884452673425701,
 0.004132159459516994,
 0.002800485609761698,
 0.00319958408907981,
 0.004698685351488342,
 0.003981325545078555,
 0.00273109009546983

In [16]:
mape

[1.0966727115165311,
 1.0666883010701227,
 1.0212481781797997,
 1.0110356334547645,
 0.8416601551132178,
 0.8868405739600301,
 0.9647584302148201,
 0.9825186627727391,
 1.1148276216282103,
 1.079782585606343,
 1.0253582549292508,
 1.0131130506101644,
 1.0494353489978754,
 1.0320125430552896,
 1.0103754021040605,
 1.0055483846937285,
 1.0419012058066535,
 1.0303058911316643,
 1.0098355322974242,
 1.005274329574825,
 0.9920045610167768,
 0.994731511338168,
 0.9986777116396016,
 0.9996426196172687,
 1.0716094801101654,
 1.0531761625634002,
 1.0170743780965728,
 1.0089314260883786,
 1.006048079269531,
 1.0078716571086506,
 1.0029408258180523,
 1.0018049011562584,
 1.1688819872885297,
 1.1165388542528838,
 1.0368895736485977,
 1.0189362505176502,
 1.1049334237644086,
 1.06365622791479,
 1.0202437738445411,
 1.0105316829441622,
 0.9753021688448809,
 0.9831672578858534,
 0.9950505214697447,
 0.9978119164519476,
 1.0174330558130686,
 1.0096390751457354,
 1.0033559573414237,
 1.0020047568113821

In [18]:
maximum=np.amax(rmse_accuracy)

In [25]:
ind=0
for i in range(0,len(rmse_accuracy)):
    if rmse_accuracy[i] == maximum:
        ind=i
        break

236


In [29]:
valid_list[59]

[12, 1, 5]

In [30]:
mape[ind]

1.7829655351483267

In [37]:
np.save("AlibabaAzure_predicted",predicted[236])

In [38]:
predicted[236]

array([ 0.02254682, -0.00103423,  0.00112826, -0.00068199,  0.00088604,
        0.00146956, -0.00282301,  0.00305271, -0.00237031,  0.0020025 ,
        0.00030904])