<a href="https://colab.research.google.com/github/drstannwoji2019/ML_Projects/blob/main/FDI_Rem_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_absolute_error, mean_squared_error


In [None]:
# Load the data
data = pd.read_csv("/FDI_Remittances_Ghana.csv")

# Clean the data by converting numeric columns
data['FDI_Ghana'] = data['FDI_Ghana'].str.replace(',', '').astype(float)
data['Rem_Ghana'] = data['Rem_Ghana'].str.replace(',', '').astype(float)


In [None]:
def generate_lagged_features(data, column_name, lags=3):
    for lag in range(1, lags + 1):
        data[f'{column_name}_lag_{lag}'] = data[column_name].shift(lag)
    return data.dropna().reset_index(drop=True)

# Apply lagged features for both FDI and Remittances
data_fdi = generate_lagged_features(data.copy(), 'FDI_Ghana', lags=3)
data_rem = generate_lagged_features(data.copy(), 'Rem_Ghana', lags=3)


In [None]:
def generate_lagged_features(data, column_name, lags=3):
    for lag in range(1, lags + 1):
        data[f'{column_name}_lag_{lag}'] = data[column_name].shift(lag)
    return data.dropna().reset_index(drop=True)

# Apply lagged features for both FDI and Remittances
data_fdi = generate_lagged_features(data.copy(), 'FDI_Ghana', lags=3)
data_rem = generate_lagged_features(data.copy(), 'Rem_Ghana', lags=3)


In [None]:
# Define features and target variables
X_fdi = data_fdi[['Year', 'FDI_Ghana_lag_1', 'FDI_Ghana_lag_2', 'FDI_Ghana_lag_3']]
y_fdi = data_fdi['FDI_Ghana']

X_rem = data_rem[['Year', 'Rem_Ghana_lag_1', 'Rem_Ghana_lag_2', 'Rem_Ghana_lag_3']]
y_rem = data_rem['Rem_Ghana']

# Split into training and testing sets
X_fdi_train, X_fdi_test, y_fdi_train, y_fdi_test = train_test_split(X_fdi, y_fdi, test_size=0.2, random_state=0)
X_rem_train, X_rem_test, y_rem_train, y_rem_test = train_test_split(X_rem, y_rem, test_size=0.2, random_state=0)


In [None]:
# Define models
models = {
    'k-NN': KNeighborsRegressor(n_neighbors=5),
    'SVR': SVR(kernel='rbf', C=100, gamma=0.1, epsilon=0.1),
    'Decision Tree': DecisionTreeRegressor(),
    'Random Forest': RandomForestRegressor(n_estimators=50, random_state=0)
}

# Initialize results dictionary
results = {'Model': [], 'FDI_MAE': [], 'FDI_MSE': [], 'FDI_RMSE': [], 'Rem_MAE': [], 'Rem_MSE': [], 'Rem_RMSE': []}

# Train and evaluate each model
for name, model in models.items():
    # FDI Predictions
    model.fit(X_fdi_train, y_fdi_train)
    fdi_pred = model.predict(X_fdi_test)
    results['Model'].append(name)
    results['FDI_MAE'].append(mean_absolute_error(y_fdi_test, fdi_pred))
    results['FDI_MSE'].append(mean_squared_error(y_fdi_test, fdi_pred))
    results['FDI_RMSE'].append(mean_squared_error(y_fdi_test, fdi_pred, squared=False))

    # Remittance Predictions
    model.fit(X_rem_train, y_rem_train)
    rem_pred = model.predict(X_rem_test)
    results['Rem_MAE'].append(mean_absolute_error(y_rem_test, rem_pred))
    results['Rem_MSE'].append(mean_squared_error(y_rem_test, rem_pred))
    results['Rem_RMSE'].append(mean_squared_error(y_rem_test, rem_pred, squared=False))




In [None]:
# ARIMA for FDI only
arima_fdi = ARIMA(y_fdi, order=(1, 1, 1)).fit()
arima_fdi_pred = arima_fdi.predict(start=len(y_fdi_train), end=len(y_fdi)-1)

# Calculate metrics for ARIMA
results['Model'].append('ARIMA')
results['FDI_MAE'].append(mean_absolute_error(y_fdi_test, arima_fdi_pred))
results['FDI_MSE'].append(mean_squared_error(y_fdi_test, arima_fdi_pred))
results['FDI_RMSE'].append(mean_squared_error(y_fdi_test, arima_fdi_pred, squared=False))
results['Rem_MAE'].append(np.nan)  # ARIMA not applied to Rem_Ghana in this case
results['Rem_MSE'].append(np.nan)
results['Rem_RMSE'].append(np.nan)




In [None]:
# Convert results to DataFrame
results_df = pd.DataFrame(results)
print(results_df)


           Model       FDI_MAE       FDI_MSE      FDI_RMSE       Rem_MAE  \
0           k-NN  2.384450e+08  7.020717e+16  2.649664e+08  1.066083e+09   
1            SVR  3.969782e+08  2.026446e+17  4.501606e+08  1.493070e+09   
2  Decision Tree  4.131853e+08  4.527318e+17  6.728535e+08  9.533461e+08   
3  Random Forest  3.081843e+08  2.514366e+17  5.014345e+08  7.792073e+08   
4          ARIMA  6.708059e+08  5.804282e+17  7.618584e+08           NaN   

        Rem_MSE      Rem_RMSE  
0  1.474426e+18  1.214259e+09  
1  3.230484e+18  1.797355e+09  
2  2.307840e+18  1.519158e+09  
3  1.488000e+18  1.219836e+09  
4           NaN           NaN  
