In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/hacklive-4/SampleSubmission.csv
/kaggle/input/hacklive-4/Train.csv
/kaggle/input/hacklive-4/Test.csv


In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.metrics import mean_squared_error
import statsmodels.api as sm
import statsmodels
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf

In [3]:
train = pd.read_csv('../input/hacklive-4/Train.csv',index_col='ID')
test = pd.read_csv('../input/hacklive-4/Test.csv',index_col='ID')
sub = pd.read_csv('../input/hacklive-4/SampleSubmission.csv',index_col='ID')

In [4]:
import warnings
warnings.filterwarnings('error',category=statsmodels.tools.sm_exceptions.HessianInversionWarning)
warnings.filterwarnings('error',category=statsmodels.tools.sm_exceptions.ConvergenceWarning)
warnings.filterwarnings('error',category=RuntimeWarning)
warnings.filterwarnings('ignore',category=UserWarning)

In [5]:
def train_arima(ts,exog,order,verbose=1):
    """
    ts - time series to train on
    exog - exogeneous variable of length same as that of ts
    order - tuple of size 3 with coefficients corresponding to AR, I and MA parts of the model
    verbose - 1 or 0 whether to print the info while running or not

    returns fit model which can forecast the values
    """
    model = statsmodels.tsa.arima.model.ARIMA(endog=ts, exog=exog, order=order)
    try:
        model_fit = model.fit()
        return model_fit
    except:
        if verbose:
            print('     The order is not valid for fitting the model')
        return None

In [6]:
def split_time_series(ts,train_size):
    """
    ts - time series which needs to be split (numpy array)
    train_size - size of the training data (0 to 1)

    returns 2 time series (train_ts,val_ts)
    """
    split_ind = int(train_size*ts.shape[0])
    train_ts,val_ts = ts[:split_ind],ts[split_ind:]
    return (train_ts,val_ts)

In [7]:
def get_forecast_error(ts,ts_pred):
    return np.round(np.sqrt(mean_squared_error(ts,ts_pred)),6)

In [8]:
def validate_arima_model(ts,exog,param_grid,train_size=0.9,verbose=1):
    """
    ts - time series available for training and validation (numpy array)
    exog - exogeneous variable of length same as that of ts
    param_grid - dictionary of arima model orders. Has keys - (p,q,r) Ex: {'p':[1,2,3],'q':[0,1,2],'r':[2,3,4]}
    returns best (p,q,r)
    """
    num_models = len(param_grid['p'])*len(param_grid['q'])*len(param_grid['r'])
    if verbose:
        print('Total Number of models to be trained =',num_models)
        print('')

    train_ts,val_ts = split_time_series(ts,train_size)
    train_exog,val_exog = split_time_series(exog,train_size)
    forecast_steps = val_ts.shape[0]
    orders_errors = []
    best_error = np.inf
    for p in param_grid['p']:
        for q in param_grid['q']:
            for r in param_grid['r']:
                if verbose:
                    print(f'The model order = ({p},{q},{r}):')
                model_fit = train_arima(train_ts,exog=train_exog,order=(p,q,r),verbose=0)
                if model_fit:
                    val_forecast = model_fit.forecast(forecast_steps,exog=val_exog)
                    error = get_forecast_error(val_ts,val_forecast)
                    if verbose:
                        print('    The error obtained = {:.4f}'.format(error))
                    if error<best_error:
                        best_error = error
                        best_order = (p,q,r)

                    orders_errors.append(((p,q,r),error))

                if verbose:
                    print('')
    if verbose:
        print(f'Best order =',best_order)
        print('Best error = {:.4f}'.format(best_error))
    return best_order,best_error,orders_errors

In [None]:
def Predict_stockprice():
    """
    st - Stock number
    the function takes stock data and performs arima forecasting followed by prediction on test data
    """
    best_orders = {}
    least_errors = {}
    pred_close_price = np.array([])
    for st in range(103):
        print('Stock label =',st)
        ts = train[train['stock']==st]['Close'].values
        exog = train[train['stock']==st]['holiday'].values 
        param_grid = {'p':[1],'q':[2],'r':[2]}
        best_order,best_error,orders_errors = validate_arima_model(ts,exog,param_grid,train_size=0.8,verbose=0)
        best_orders[st] = best_order
        least_errors[st] = best_error
        model_fit = train_arima(ts,exog,best_order,verbose=0)
        k = 1
        if model_fit==None:
            orders_errors = sorted(orders_errors,key=lambda x:x[1],reverse=False)
        while model_fit==None:
            model_fit = train_arima(ts,exog,orders_errors[k][0],verbose=0)
            best_orders[st] = orders_errors[k][0]
            least_errors[st] = orders_errors[k][1]
            k+=1
        print('     Best order =',best_orders[st])
        print('     Least error =',least_errors[st])
        test_exog = test[test['stock']==st]['holiday'].values
        steps = test_exog.shape[0]
        forecasts = model_fit.forecast(steps,exog=test_exog)
        pred_close_price = np.append(pred_close_price,forecasts)
        print('')

    return pred_close_price,best_orders,least_errors

output = Predict_stockprice()

Stock label = 0
     Best order = (1, 2, 2)
     Least error = 7.167805

Stock label = 1
     Best order = (1, 2, 2)
     Least error = 0.324255

Stock label = 2
     Best order = (1, 2, 2)
     Least error = 1.245404

Stock label = 3
     Best order = (1, 2, 2)
     Least error = 2.502716

Stock label = 4
     Best order = (1, 2, 2)
     Least error = 7.284624

Stock label = 5
     Best order = (1, 2, 2)
     Least error = 31.539491

Stock label = 6
     Best order = (1, 2, 2)
     Least error = 2.458542

Stock label = 7
     Best order = (1, 2, 2)
     Least error = 3.91475

Stock label = 8
     Best order = (1, 2, 2)
     Least error = 0.971441

Stock label = 9
     Best order = (1, 2, 2)
     Least error = 4.97305

Stock label = 10
     Best order = (1, 2, 2)
     Least error = 0.783074

Stock label = 11
     Best order = (1, 2, 2)
     Least error = 1.614728

Stock label = 12
     Best order = (1, 2, 2)
     Least error = 13.668576

Stock label = 13
     Best order = (1, 2, 2)
   

In [None]:
sub['Close'] = output[0]

In [None]:
sub.to_csv('/kaggle/working/Submission_file12.csv')

In [None]:
sub.head()