<a href="https://colab.research.google.com/github/drstannwoji2019/ML_Projects/blob/main/Total_FDI_vs_Total_Rem.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
# Data Preparation
import pandas as pd

# Load the dataset
file_path = '/content/T-FDI_vs_T-Rem_CGPT.csv'
data = pd.read_csv(file_path)

# Clean and convert the data to numeric by removing commas and casting to float
data['FDI_Total'] = data['FDI_Total'].str.replace(',', '').astype(float)
data['Rem_Total'] = data['Rem_Total'].str.replace(',', '').astype(float)

# Display the cleaned dataset
data.head()


Unnamed: 0,FDI_Total,Rem_Total
0,2205362000.0,2499016000.0
1,5354688000.0,14832860000.0
2,5739298000.0,17195610000.0
3,7724402000.0,18291410000.0
4,11316410000.0,19471590000.0


In [3]:
# Simple Linear Regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Separate the independent and dependent variables
X = data[['FDI_Total']].values
y = data['Rem_Total'].values

# Initialize and fit the linear regression model
lr_model = LinearRegression()
lr_model.fit(X, y)

# Predict values
y_pred = lr_model.predict(X)

# Calculate evaluation metrics
mse = mean_squared_error(y, y_pred)
mae = mean_absolute_error(y, y_pred)
r2 = r2_score(y, y_pred)

# Display the results
print("Linear Regression Results:")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"R-squared (R²): {r2}")
print(f"Intercept: {lr_model.intercept_}")
print(f"Coefficient: {lr_model.coef_[0]}")


Linear Regression Results:
Mean Squared Error (MSE): 3.307443632028242e+19
Mean Absolute Error (MAE): 4302673344.010739
R-squared (R²): 0.03372003798330192
Intercept: 19214618267.013615
Coefficient: 0.3135354402644768


In [4]:
# ARIMA Model
from statsmodels.tsa.arima.model import ARIMA

# Fit ARIMA model
arima_model = ARIMA(data['Rem_Total'], order=(1, 1, 1))
arima_fit = arima_model.fit()

# Display ARIMA summary
print("ARIMA Model Summary:")
print(arima_fit.summary())


  warn('Non-stationary starting autoregressive parameters'
  warn('Non-invertible starting MA parameters found.'


ARIMA Model Summary:
                               SARIMAX Results                                
Dep. Variable:              Rem_Total   No. Observations:                   19
Model:                 ARIMA(1, 1, 1)   Log Likelihood                -426.923
Date:                Wed, 11 Dec 2024   AIC                            859.846
Time:                        06:53:05   BIC                            862.517
Sample:                             0   HQIC                           860.214
                                 - 19                                         
Covariance Type:                  opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1         -0.1118      1.657     -0.067      0.946      -3.359       3.135
ma.L1          0.2945      1.613      0.183      0.855      -2.866       3.455
sigma2      5.487e+18        na

In [5]:
# SARIMA Model for potential seasonality
from statsmodels.tsa.statespace.sarimax import SARIMAX

# Fit SARIMA model
sarima_model = SARIMAX(data['Rem_Total'], order=(1, 1, 1), seasonal_order=(1, 1, 1, 12))
sarima_fit = sarima_model.fit()

# Display SARIMA summary
print("SARIMA Model Summary:")
print(sarima_fit.summary())


  warn('Too few observations to estimate starting parameters%s.'


SARIMA Model Summary:
                                     SARIMAX Results                                      
Dep. Variable:                          Rem_Total   No. Observations:                   19
Model:             SARIMAX(1, 1, 1)x(1, 1, 1, 12)   Log Likelihood                -140.966
Date:                            Wed, 11 Dec 2024   AIC                            291.931
Time:                                    06:55:40   BIC                            290.890
Sample:                                         0   HQIC                           287.763
                                             - 19                                         
Covariance Type:                              opg                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
ar.L1          0.0447      0.758      0.059      0.953      -1.440       1.529
ma.L1        

In [6]:
# Model Comparison
# Gather AIC values for comparison
arima_aic = arima_fit.aic
sarima_aic = sarima_fit.aic

print("Model AIC Comparison:")
print(f"ARIMA AIC: {arima_aic}")
print(f"SARIMA AIC: {sarima_aic}")


Model AIC Comparison:
ARIMA AIC: 859.8458410290962
SARIMA AIC: 291.93132921936336


In [7]:
# Econometric Equation: Constructed from the simple linear regression equation
print("Econometric Equation:")
print(f"Rem_Total = {lr_model.coef_[0]} * FDI_Total + {lr_model.intercept_}")


Econometric Equation:
Rem_Total = 0.3135354402644768 * FDI_Total + 19214618267.013615
