In [None]:
# Core Libraries
import pandas as pd
import numpy as np


# Visualizing
import seaborn as sns
import matplotlib.pyplot as plt
from statsmodels.graphics.tsaplots import plot_pacf, plot_acf

# Analysis
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_absolute_error,mean_squared_error
import pmdarima as pm

# The U.S. Historical Unemployment Data Set

### Exploratory Data 

In [None]:
Raw_Data = pd.read_csv("UNRATE.csv")                     #Unemployement Rate
Raw_Data["DATE"] = pd.DatetimeIndex(Raw_Data["DATE"])

Raw_Data = Raw_Data.set_index("DATE")
Raw_Data

In [None]:
sns.set_theme(style="darkgrid")

Graph = sns.lineplot(data=Raw_Data, x = Raw_Data.index,y = "UNRATE")

## Data Set Analysis

In [None]:
UnRate = Raw_Data
UnRate.index = pd.to_datetime(UnRate.index, format='%Y-%m')
UnRate_Monthly = UnRate["UNRATE"]
UnRate_Monthly = UnRate_Monthly.asfreq("M", method='bfill')  # Frequency is Monthly
UnRate_Monthly.dropna(inplace=True)

plt.rcParams['figure.figsize'] = [30,10]
decomposition = sm.tsa.seasonal_decompose(UnRate_Monthly,model='additive')
fig = decomposition.plot()
plt.show()

### ACF - PACF

In [None]:

fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(20,5), dpi= 100)

plot_acf(UnRate_Monthly, ax=ax1, lags=48)
plot_pacf(UnRate_Monthly, ax=ax2, lags=48, method="ywm")

plt.show()

### Stationary Controlling

In [None]:
def adf(data):
    adf_test = sm.tsa.adfuller(data, autolag='AIC')
    print(f'ADF Statistic: {adf_test[0]}')
    print(f'p-value: {adf_test[1]}')
    print(f'# of Lags Used: {adf_test[2]}')
    print(f'Number of Observations Used: {adf_test[3]}')
    print("Series is not stationary") if adf_test[1] > 0.05 else print("Series is stationary (No Unit Root)")

# KPSS Test:

def kpss(data):
    kpss_test = sm.tsa.kpss(data)
    print('KPSS Statistic: %f' % kpss_test[0])
    print('p-value: %f' % kpss_test[1])
    print("Series is stationary") if kpss_test[1] > 0.05 else print("Series is not stationary(Serial Contains Unit Root)")

print(adf(UnRate_Monthly))
print(kpss(UnRate_Monthly))

In [None]:
UnRate_Monthly = pd.DataFrame(UnRate_Monthly)
UnRate_Daily_Diff = UnRate_Monthly["UNRATE"] - UnRate_Monthly["UNRATE"].shift(1)
UnRate_Daily_Diff = UnRate_Daily_Diff.dropna()


fig, (ax1, ax2) = plt.subplots(1, 2,figsize=(20,5), dpi= 70)

plot_acf(UnRate_Daily_Diff.to_list(), ax=ax1, lags=48)
plot_pacf(UnRate_Daily_Diff.to_list(), ax=ax2, lags=48, method="ywm")

plt.show()

print(adf(UnRate_Daily_Diff))
print()
print(kpss(UnRate_Daily_Diff))

## Time Series Analysis (TS)

In [None]:
separator = int(len(UnRate)*0.80)

Train = UnRate_Monthly.iloc[:separator].copy()
Test  = UnRate_Monthly.iloc[separator:].copy()

plt.figure(figsize=(30,10))
plt.plot(Train,label='Training Data')
plt.plot(Test,label='Testing Data')
plt.plot(UnRate_Daily_Diff, label = "Diffirence Data Set")
plt.title("Apple Inc. Train and Test Values")
plt.xlabel("Years")
plt.ylabel("Prices")
plt.legend(loc="upper left")

In [None]:
Arima_Model = sm.tsa.arima.ARIMA(Train["UNRATE"], order= (1,1,1))
Arima_Model_fit = Arima_Model.fit()

Arima_Forecast = Arima_Model_fit.forecast(len(Test))
Arima_Forecast = pd.DataFrame(Arima_Forecast,index = Test.index)
Arima_Forecast.rename(columns={0:"Forecast"}, inplace= True)

UNRATE_Pred = Test.copy()
UNRATE_Pred["Arima_pred"] = Arima_Forecast
UNRATE_Pred.round(decimals= 3)

mae = mean_absolute_error(Test,Arima_Forecast)
rsme = np.sqrt(mean_squared_error(Test,Arima_Forecast))

plt.figure(figsize=(22,4))
sns.lineplot(data = UNRATE_Pred[['UNRATE', 'Arima_pred']]).set(title = f"MAE: {mae.round()}, SMA: {rsme.round()}, ARIMA")
plt.show()


print(Arima_Model_fit.summary())

#### Improving TS Model

In [None]:
Final_ts = pd.DataFrame(columns=["Date","UNRATE"])
Final_ts["Date"] = pd.DatetimeIndex(Raw_Data.index).to_period("D")
Final_ts["UNRATE"] = Raw_Data["UNRATE"].values

# Set the column 'Date' as index (skip if already done)
Final_ts = Final_ts.set_index('Date')
Final_ts = Final_ts.asfreq('D')

Final_ts.dropna(inplace=True)
train_data, test_data = Final_ts[0:int(len(Final_ts)*0.8)], Final_ts[int(len(Final_ts)*0.8):]

Start_date = test_data.index[0]
End_time = test_data.index[-1]


ARIMA_Final_Model = sm.tsa.arima.ARIMA(Final_ts["UNRATE"],order=(2,1,0))
Model_fit = ARIMA_Final_Model.fit()


Prediction = Model_fit.predict(start=Start_date,end=End_time)

score_mae = mean_absolute_error(test_data,Prediction)
score_rsme = np.sqrt(mean_squared_error(test_data,Prediction))


ARIMA_dataframe = pd.DataFrame(test_data)
ARIMA_dataframe["prediction"] = Prediction
ARIMA_dataframe.plot(figsize=(20,10),title = f"MAE: {score_mae.round()}, SMA: {score_rsme.round()}, ARIMA")
print(Model_fit.summary())

In [None]:
plt.figure(figsize=(20,10))
plt.plot(Train,color='blue',label='Actual Price')
plt.plot(Prediction,color='red', marker='o',linestyle='dashed',label='Predicted Price')
plt.plot(Test,color = "green",label='Testing Data')
plt.title('Apple Inc. Prices Prediction')
plt.xlabel('Dates')
plt.ylabel('Prices')
plt.legend(loc = "upper left")

## Auto-Time Series Modeling

In [None]:
# Seasonal - fit stepwise auto-ARIMA
smodel = pm.auto_arima(Final_ts, start_p=1, start_q=1,
                         test='adf',
                         max_p=3, max_q=3, m=12,
                         start_P=0, seasonal=True,
                         d=None, D=1, trace=True,
                         error_action='ignore',  
                         suppress_warnings=True, 
                         stepwise=True)

smodel.summary()

In [None]:
Final_Train_Data, Final_Test_Data = Final_ts[0:int(len(Final_ts)*0.8)], Final_ts[int(len(Final_ts)*0.8):]


Start_prediction = Final_Test_Data.index[0]
End_prediction   = Final_Test_Data.index[-1]

Final_Model = sm.tsa.statespace.SARIMAX(Final_ts,order=(2,0,1),seasonal_order =(2,1,0,12))
Final_Model_Fit = Final_Model.fit()


Final_Prediction = Final_Model_Fit.predict(start= Start_prediction , end = End_prediction)


Final_score_mae = mean_absolute_error(Final_Test_Data,Final_Prediction)
Final_score_rsme = np.sqrt(mean_squared_error(Final_Test_Data,Final_Prediction))

Sarimax = pd.DataFrame(Final_Test_Data)
Sarimax["prediction"] = pd.Series(Final_Prediction,index=Final_Test_Data.index)
Sarimax.plot(title = f"MAE: {Final_score_mae.round()}, SMA: {Final_score_rsme.round()}, SARIMA",figsize=(20,10))