# The Best of the Best Models

In this chapter, you will become a modeler of discerning taste. You'll learn how to identify promising model orders from the data itself, then, once the most promising models have been trained, you'll learn how to choose the best model from this fitted selection. You'll also learn a great framework for structuring your time series projects.

# AR or MA

![image.png](attachment:7533eb55-18d2-4f7c-bc84-c0712aa950b3.png)

In [None]:
# Import
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

# Create figure
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(12,8))
 
# Plot the ACF of df
plot_acf(df, lags=10, zero=False, ax=ax1)

# Plot the PACF of df
plot_pacf(df, lags=10, zero=False, ax=ax2)

plt.show()

![image.png](attachment:cb7397f8-f375-44aa-b752-9e7a933ce064.png)

## Order of earthquakes

In [None]:
# Create figure
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(12,8))

# Plot ACF and PACF
plot_acf(earthquake, lags=10, zero=False, ax=ax1)
plot_pacf(earthquake, lags=10, zero=False, ax=ax2)

# Show plot
plt.show()

# Instantiate model
model = SARIMAX(earthquake, order=(1,0,0))

# Train model
results = model.fit()

![image.png](attachment:6c5b5a7e-61da-48f6-8556-d1a0437303c7.png)

## Searching over model order

In [None]:
# Create empty list to store search results
order_aic_bic=[]

# Loop over p values from 0-2
for p in range(3):
  # Loop over q values from 0-2
    for q in range(3):
      	# create and fit ARMA(p,q) model
        model = SARIMAX(df, order=(p,0,q))
        results = model.fit()
        
        # Append order and results tuple
        order_aic_bic.append((p, q, results.aic, results.bic))

## Choosing order with AIC and BIC

In [None]:
# Construct DataFrame from order_aic_bic
order_df = pd.DataFrame(order_aic_bic, 
                        columns=['p', 'q', 'AIC', 'BIC'])

# Print order_df in order of increasing AIC
print(order_df.sort_values(by='AIC'))

# Print order_df in order of increasing BIC
print(order_df.sort_values(by='BIC'))

## AIC and BIC vs ACF and PACF

In [None]:
# Loop over p values from 0-2
for p in range(3):
    # Loop over q values from 0-2
    for q in range(3):
      
        try:
            # create and fit ARMA(p,q) model
            model = SARIMAX(earthquake, order=(p,0,q))
            results = model.fit()
            
            # Print order and results
            print(p, q, results.aic, results.bic)
            
        except:
            print(p, q, None, None)     

## Mean absolute error

In [None]:
# Fit model
model = SARIMAX(earthquake, order=(1,0,1))
results = model.fit()

# Calculate the mean absolute error from residuals
mae = np.mean(np.abs(results.resid))

# Print mean absolute error
print(mae)

# Make plot of time series for comparison
earthquake.plot()
plt.show()

![image.png](attachment:64847355-0a72-4537-8428-9771fe095091.png)

## Diagnostic summary statistics

![image.png](attachment:b8b34885-2e83-4733-b515-83498187693a.png)

In [None]:
# Create and fit model
model1 = SARIMAX(df, order=(3,0,1))
results1 = model1.fit()

# Print summary
print(results1.summary())

![image.png](attachment:18fb20d2-3272-4a46-8ad5-81936724390a.png)

## Plot diagnostics

![image.png](attachment:c3840008-5c2e-4a50-8f01-978526503297.png)

In [None]:
# Create and fit model
model = SARIMAX(df, order=(1,1,1))
results=model.fit()

# Create the 4 diagostics plots
results.plot_diagnostics()
plt.show()

![image.png](attachment:f9988817-5289-482c-b82a-c320bdeaeb7a.png)

## Box-Jenkins method

### Identification

In [None]:
# Plot time series
savings.plot()
plt.show()

# Run Dicky-Fuller test
result = adfuller(savings)

# Print test statistic
print(result[0])

# Print p-value
print(result[1])

![image.png](attachment:ca1c169f-a876-4f3b-89c0-43ac8fd56ca8.png)

### Identification 2

In [None]:
# Create figure
fig, (ax1, ax2) = plt.subplots(2,1, figsize=(12,8))
 
# Plot the ACF of savings on ax1
plot_acf(savings, lags=10, zero=False, ax=ax1)

# Plot the PACF of savings on ax2
plot_pacf(savings, lags=10, zero=False, ax=ax2)

plt.show()

![image.png](attachment:e26c1a72-1295-472a-842b-61019c9050ef.png)

## Estimation

In [None]:
# Loop over p values from 0-3
for p in range(4):
  
  # Loop over q values from 0-3
    for q in range(4):
      try:
        # Create and fit ARMA(p,q) model
        model = SARIMAX(savings, order=(p,0,q), trend='c')
        results = model.fit()
        
        # Print p, q, AIC, BIC
        print(p, q, results.aic, results.bic)
        
      except:
        print(p, q, None, None)

## Diagnostics

In [None]:
# Create and fit model
model = SARIMAX(savings, order=(1,0, 2), trend='c')
results = model.fit()

# Create the 4 diagostics plots
results.plot_diagnostics()
plt.show()

# Print summary
print(results.summary())

![image.png](attachment:c7344b7b-6929-4372-b64e-c45850585ee0.png)