In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import yfinance as yf
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.stats.diagnostic import acorr_ljungbox
import seaborn as sns

In [None]:
# ticker = ['TECHM.NS', 'M&M.NS', 'WIPRO.NS', 'SBIN.NS', 'BAJFINANCE.NS',
#     'INFY.NS', 'KOTAKBANK.NS', 'TRENT.NS', 'BAJAJFINSV.NS', 'SHRIRAMFIN.NS',
#     'ADANIPORTS.NS', 'TATAMOTORS.NS']

In [None]:
ticker = ['TECHM.NS', 'M&M.NS', 'WIPRO.NS', 'SBIN.NS', 'BAJFINANCE.NS',
    'INFY.NS', 'KOTAKBANK.NS', 'TRENT.NS', 'BAJAJFINSV.NS', 'SHRIRAMFIN.NS',
    'ADANIPORTS.NS', 'TATAMOTORS.NS', 'AXISBANK.NS', 'COALINDIA.NS', 'HCLTECH.NS',
    'BRITANNIA.NS', 'ICICIBANK.NS', 'LT.NS', 'CIPLA.NS', 'NTPC.NS',
    'BHARTIARTL.NS', 'NESTLEIND.NS', 'TATACONSUM.NS', 'HEROMOTOCO.NS', 'APOLLOHOSP.NS',
    'BEL.NS', 'HDFCBANK.NS', 'HINDALCO.NS', 'TCS.NS', 'SBILIFE.NS',
    'POWERGRID.NS', 'ULTRACEMCO.NS', 'ADANIENT.NS', 'HDFCLIFE.NS', 'GRASIM.NS',
    'ITC.NS', 'DRREDDY.NS', 'RELIANCE.NS', 'ONGC.NS', 'JSWSTEEL.NS',
    'EICHERMOT.NS', 'TITAN.NS', 'SUNPHARMA.NS', 'INDUSINDBK.NS', 'BPCL.NS',
    'MARUTI.NS', 'TATASTEEL.NS', 'BAJAJ-AUTO.NS', 'ASIANPAINT.NS', 'HINDUNILVR.NS']

In [None]:
start = '2019-01-01'
end = '2023-12-31'

In [None]:
test_start = '2024-01-01'
test_end = '2024-08-01'

In [None]:
#give the number of days to forecast
forecast_days = 20

In [None]:
all_ljung_box_results = pd.DataFrame()

In [None]:
for company in ticker:
    df = yf.download(company, start = start, end = end)

    data = df['Close']

    # First differencing
    diff_data = data.diff().dropna()

    plt.figure(figsize=(10, 6))
    plot_acf(diff_data, lags=20)
    plt.title(f'Autocorrelation: {company}')
    plt.xlabel('Lag')
    plt.ylabel('Autocorrelation')
    plt.savefig(f'/content/drive/MyDrive/Ph.D/Datasets/ARIMA/acf_plot/{company}.png', format='png', dpi=300)
    plt.clf()
    plt.close()

    plt.figure(figsize=(10, 6))
    plot_pacf(diff_data, lags=20)
    plt.title(f'Partial Autocorrelation: {company}')
    plt.xlabel('Lag')
    plt.ylabel('Autocorrelation')
    plt.savefig(f'/content/drive/MyDrive/Ph.D/Datasets/ARIMA/pacf_plot/{company}.png', format='png', dpi=300)
    plt.clf()
    plt.close()

    # Fit ARIMA model with determined (p,d,q) order
    model = ARIMA(diff_data, order=(5,0,2))
    arima_result = model.fit()

    # Plot ACF of residuals to check for autocorrelation
    residuals = arima_result.resid
    plt.figure(figsize=(10, 6))
    plot_acf(residuals)
    plt.title(f'Autocorrelation of Residuals: {company}')
    plt.xlabel('Lag')
    plt.ylabel('Autocorrelation')
    plt.savefig(f'/content/drive/MyDrive/Ph.D/Datasets/ARIMA/resid_acf/{company}.png', format='png', dpi=300)
    plt.clf()
    plt.close()

    # Perform Ljung-Box test
    ljung_box_test = acorr_ljungbox(residuals, lags=[10], return_df=True)
    ljung_box_test['Company'] = company

    # Append Ljung-Box results to cumulative DataFrame
    all_ljung_box_results = pd.concat([all_ljung_box_results, ljung_box_test], ignore_index=True)

    arima_result.plot_diagnostics(figsize=(15, 8))
    plt.title(f'ARIMA Diagnostics: {company}')
    plt.savefig(f'/content/drive/MyDrive/Ph.D/Datasets/ARIMA/diagnostics/{company}.png', format='png', dpi=300)
    plt.clf()

    forecast = arima_result.forecast(steps=forecast_days)

    last_value = data[-1]

    forecast = pd.Series(forecast)

    forecasted_original_scale = [last_value + forecast.iloc[0]]

    for i in range(1, len(forecast)):
        next_value = forecasted_original_scale[-1] + forecast.iloc[i]
        forecasted_original_scale.append(next_value)

    test_length = len(forecasted_original_scale)

    test_full = yf.download(company, start = test_start, end = test_end)

    test_data_full = test_full['Close']
    test_data = test_data_full[:test_length]

    forecasted_original_scale = pd.DataFrame(forecasted_original_scale, columns = ['Predicted'])
    forecasted_original_scale.index = test_data.index[:test_length]

    # prompt: plot the test_data and forecasted_original_scale

    plt.figure(figsize=(12, 6))
    plt.plot(test_data, label='Actual')
    plt.plot(forecasted_original_scale, label='Forecast')
    plt.xlabel('Date')
    plt.ylabel('Close Price')
    plt.title(f'Actual vs. Forecasted: {company}')
    plt.legend()
    plt.savefig(f'/content/drive/MyDrive/Ph.D/Datasets/ARIMA/forecast/{company}.png', format='png', dpi=300)
    plt.clf()
    plt.close()

    forecasted_original_scale.to_csv(f'/content/drive/MyDrive/Ph.D/Datasets/ARIMA/forecast_data/{company}.csv')





[*********************100%***********************]  1 of 1 completed
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  last_value = data[-1]
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'
  return get_prediction_index(
  return get_prediction_index(
  last_value = data[-1]
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  self._init_dates(dates, freq)
  return get_prediction_index(
  return get_prediction_index(
  last_value = data

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1000x600 with 0 Axes>

<Figure size 1500x800 with 0 Axes>

In [None]:
all_ljung_box_results.to_csv('/content/drive/MyDrive/Ph.D/Datasets/ARIMA/cumulative_ljung_box_results.csv', index=False)