In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error
from statsmodels.tsa.arima.model import ARIMA
import warnings
import pickle
import gzip

df = pd.read_csv('../Data/Border_Crossing_Entry_Data.csv')
df.head()





In [None]:
# convert String date to datetime
df['Date'] = pd.to_datetime(df['Date'])
df['Port Name'].nunique()
df = df[(df['Border'] == 'US-Mexico Border')]

In [None]:
# plotting border crossing volume over time
plt.figure(figsize=(12, 6))
df.set_index('Date')['Value'].plot()
plt.title('Border Crossing Volume Over Time (All Ports)')
plt.xlabel('Date')
plt.ylabel('Border Crossing Volume (Ten Thousands)')
plt.show()

### Feature Engineering

In [None]:

lags = [1, 7, 30, 365]  # lag periods (in days)
for lag in lags:
    df[f'Latitude_Lag_{lag}'] = df['Latitude'].shift(lag)
    df[f'Longitude_Lag_{lag}'] = df['Longitude'].shift(lag)

In [None]:
df.info()

In [None]:
df.isnull().sum()
df.dropna(inplace=True)

### Model Training

In [None]:
features = ['Latitude', 'Longitude', 'Date', 'Latitude_Lag_1', 'Longitude_Lag_1']
target = 'Value'

train_data = df[df['Date'] < '2022-01-01']  
test_data = df[df['Date'] >= '2022-01-01']  

X_train = train_data[features]
y_train = train_data[target]
X_test = test_data[features]
y_test = test_data[target]

print("Training set shape:", X_train.shape, y_train.shape)
print("Testing set shape:", X_test.shape, y_test.shape)

In [None]:
# Fit Arima Model for Sumas Port

train_sumas_data = train_data[train_data['Port Name'] == 'Sumas']
y_train_sumas = train_sumas_data[target]

order = (1, 0, 0)  # (p, d, q)
model = ARIMA(y_train_sumas, order=order)
trained_model = model.fit()


test_sumas_data = test_data[test_data['Port Name'] == 'Sumas']
y_test_sumas = test_sumas_data[target]
forecast = trained_model.forecast(steps=len(y_test_sumas))

rmse = np.sqrt(mean_squared_error(y_test_sumas, forecast))
mae = mean_absolute_error(y_test_sumas, forecast)


print("Mean Absolute Error:", round(mae, 2))
print("Root Mean Squared Error:", round(rmse, 2))





In [None]:
# Fit Arima Model for all Ports

trained_models = {}
evaluation_metrics = {}

for port_name in df['Port Name'].unique():
    train_port_data = train_data[train_data['Port Name'] == port_name]
    y_train_port = train_port_data[target]
    
    order = (1, 0, 0)  
    model = ARIMA(y_train_port, order=order)
    trained_model = model.fit()

    trained_models[port_name] = trained_model
    test_port_data = test_data[test_data['Port Name'] == port_name]
    y_test_port = test_port_data[target]
    
    if len(test_port_data) > 0:
        forecast = trained_model.forecast(steps=len(test_port_data))
        
        rmse = np.sqrt(mean_squared_error(y_test_port, forecast))
        mae = mean_absolute_error(y_test_port, forecast)
        
        evaluation_metrics[port_name] = {'RMSE': rmse, 'MAE': mae}
    else:
        print(f"Insufficient testing data available for port: {port_name}")
        # trained_models[port_name] = 'Insufficient data'
        

# Calculate average MAE and RMSE across all ports
mae_values = [metrics['MAE'] for metrics in evaluation_metrics.values()]
rmse_values = [metrics['RMSE'] for metrics in evaluation_metrics.values()]

average_mae = np.mean(mae_values)
average_rmse = np.mean(rmse_values)

print("Average Mean Absolute Error across all ports:", round(average_mae, 2))
print("Average Root Mean Squared Error across all ports:", round(average_rmse, 2))

In [None]:
# Predicted Value for Port Sumas 2 year in the future
prediction_date = '2026-01-01'
prediction_2026_01_01 = trained_model.forecast(steps=1, index=pd.to_datetime([prediction_date]))

print("Predicted value for January 1, 2026 for the 'Sumas' port:", round(prediction_2026_01_01[0], 2))


In [None]:

predictions_2026_01_01 = {}

for port_name, trained_model in trained_models.items():

    prediction = trained_model.forecast(steps=1, index=pd.to_datetime([prediction_date]))
    predictions_2026_01_01[port_name] = round(prediction[0], 2)

print("Predictions for January 1, 2026:")
for port_name, prediction in predictions_2026_01_01.items():
    print(f"Port: {port_name}, Predicted Value: {prediction}")

In [None]:
saved_data = {
    'trained_models': trained_models,
    'evaluation_metrics': evaluation_metrics,
    'average_mae': average_mae,
    'average_rmse': average_rmse,
    'predictions_2026_01_01': predictions_2026_01_01
}

pickle_file = 'trained_models.pkl'

with open(pickle_file, 'wb') as f:
    pickle.dump(saved_data, f)

print(f"Data saved to {pickle_file}")
