# ARIMA Assignment â€” Week 1

This notebook implements the full ARIMA workflow: data collection, preprocessing, stationarity testing, model selection, forecasting, evaluation, walk-forward validation, and residual diagnostics.

## 0) Setup

In [None]:

# Uncomment if required
# !pip install yfinance pmdarima

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.stats.diagnostic import acorr_ljungbox
import statsmodels.api as sm

from sklearn.metrics import mean_absolute_error, mean_squared_error

plt.rcParams['figure.figsize'] = (12,5)


## 1) Data Collection

In [None]:

import yfinance as yf

TICKER = 'MSFT'
START_DATE = '2022-01-01'
END_DATE = '2024-12-31'

data = yf.download(TICKER, start=START_DATE, end=END_DATE, progress=False)
ts = data['Close'].rename('Close')

ts = ts.asfreq('B').ffill()

plt.plot(ts)
plt.title(f'{TICKER} Close Price')
plt.show()


## 2) Stationarity Test

In [None]:

result = adfuller(ts)
print('ADF Statistic:', result[0])
print('p-value:', result[1])

ts_diff = ts.diff().dropna()
result2 = adfuller(ts_diff)
print('ADF after differencing p-value:', result2[1])


## 3) Train-Test Split

In [None]:

n = len(ts)
train = ts.iloc[:int(0.8*n)]
test = ts.iloc[int(0.8*n):]

plt.plot(train, label='Train')
plt.plot(test, label='Test')
plt.legend()
plt.show()


## 4) ACF & PACF

In [None]:

plot_acf(train, lags=40)
plot_pacf(train, lags=40)
plt.show()


## 5) Model Fitting

In [None]:

model = ARIMA(train, order=(1,1,1))
res = model.fit()
print(res.summary())


## 6) Forecasting

In [None]:

forecast = res.forecast(steps=len(test))

plt.plot(test, label='Actual')
plt.plot(forecast, label='Forecast')
plt.legend()
plt.show()


## 7) Evaluation

In [None]:

mae = mean_absolute_error(test, forecast)
rmse = mean_squared_error(test, forecast, squared=False)

print('MAE:', mae)
print('RMSE:', rmse)


## 8) Residual Diagnostics

In [None]:

resid = res.resid
plot_acf(resid.dropna())
plt.show()

sm.qqplot(resid.dropna(), line='s')
plt.show()


## End of Notebook