In [63]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
path = "C:\\Users\\JESSE\\Downloads\\1960_onwards.csv"
df = pd.read_csv(path)
import warnings
warnings.filterwarnings('ignore')
import pickle

In [64]:
df.head()

Unnamed: 0,Year,Consumer price index (2010 = 100),GDP (constant LCU),GDP (current LCU),GDP growth (annual %),GDP per capita (current US$),GDP per capita growth (annual %),"Inflation, GDP deflator (annual %)","Inflation, consumer prices (annual %)",Livestock production index (2014-2016 = 100),...,Other Assets Net,Money Supply M2,CBN Bills,Special Intervention Reserves,GDPBillions of US $,Per CapitaUS $,Growth RateAnnual % Change,Petrol Price (Naira),Food imports (% of merchandise imports),Food exports (% of merchandise imports)
0,1960,0.065886,9277980000000.0,2997269000.0,0.191795,93.397048,-1.83401,6.257169,5.444327,17.19,...,4.49,272.4,2447132.17,258214.99,4.196175,93.397,0.0,0.05,14.129951,64.523594
1,1961,0.070023,9295770000000.0,3190921000.0,0.191795,97.420968,-1.83401,6.257169,6.279147,17.19,...,-14.56,292.83,2447132.17,258214.99,4.467288,97.421,0.1918,0.05,14.129951,64.523594
2,1962,0.07371,9677180000000.0,3506715000.0,4.102993,104.852749,1.954451,5.565301,5.265632,15.71,...,-17.91,325.43,2447132.17,258214.99,4.909399,104.8527,4.103,0.05,14.129951,64.523594
3,1963,0.071724,10507300000000.0,3689709000.0,8.578619,108.015742,6.306528,-3.094752,-2.694655,15.95,...,-9.2,361.82,2447132.17,258214.99,5.16559,108.0157,8.5786,0.05,12.139264,62.312145
4,1964,0.072338,11027500000000.0,3966381000.0,4.950489,113.658375,2.729816,2.427809,0.856793,17.6,...,-0.49,430.49,2447132.17,258214.99,5.552931,113.6584,4.9505,0.05,9.237443,61.003786


## Regression Models

### Gdp (as target variable)

In [65]:
# Lasso reg is to be used for feature selection
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
from sklearn.model_selection import train_test_split
# Feature selection with lasso reg model
X = df.drop(columns = ['GDP per capita (current US$)','Year','Consumer price index (2010 = 100)'])
y_gdp = df['GDP per capita (current US$)']
y_cpi = df['Consumer price index (2010 = 100)']

X_train, X_test, y_train_gdp, y_test_gdp = train_test_split(X, y_gdp, test_size=0.2, random_state=42)
# Use LassoCV to automatically tune the alpha parameter
alpha = np.logspace(-2,10,100)
lasso_cv = LassoCV(alphas = alpha,cv=5)
lasso_cv.fit(X_train, y_train_gdp)

# Best alpha found by cross-validation
best_alpha = lasso_cv.alpha_
print(f"Best Alpha: {best_alpha}")

# Fit the Lasso model with the best alpha
lasso_best = Lasso(alpha=best_alpha)
lasso_best.fit(X_train, y_train_gdp)

# Identify the selected features
selected_features_best = X.columns[lasso_best.coef_ != 0]
print("Selected Features with Best Alpha:")
print(selected_features_best)
print(f'{len(selected_features_best)} columns were selected')

Best Alpha: 8.111308307896872
Selected Features with Best Alpha:
Index(['GDP (constant LCU)', 'GDP (current LCU)', 'GDP growth (annual %)',
       'Inflation, GDP deflator (annual %)',
       'Official exchange rate (LCU per US$, period average)',
       'Total reserves (includes gold, current US$)', 'Population, total',
       'Population, female', 'Population, male', 'Trade Openness Index(%)',
       'Cumulative crude oil production up to and including year',
       'Narrow Money', 'Money Supply M3', 'Net Foreign Assets',
       'Net Domestic Credit', 'Credit to Government',
       'Credit to Private Sector', 'Base Money', 'Currency in Circulation',
       'Bank Reserves', 'Currency Outside Banks', 'Demand Deposits',
       'Quasi Money', 'Net Domestic Assets', 'Other Assets Net',
       'Money Supply M2', 'CBN Bills', 'Special Intervention Reserves',
       'GDPBillions of US $', 'Per CapitaUS $', 'Petrol Price (Naira)'],
      dtype='object')
31 columns were selected


In [66]:
from sklearn.linear_model import RidgeCV
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
# Select the relevant features
X_train_selected = X_train[selected_features_best]
X_test_selected = X_test[selected_features_best]

# Use RidgeCV for prediction
ridge_cv_gdp = RidgeCV(cv=5)
ridge_cv_gdp.fit(X_train_selected, y_train_gdp)
ridge_pred = ridge_cv_gdp.predict(X_test_selected)
best_alpha = ridge_cv_gdp.alpha_
print(f"Best Alpha is : {best_alpha}")
# Evaluate the model
ridge_score = ridge_cv_gdp.score(X_test_selected, y_test_gdp)
print(f"RidgeCV Score: {ridge_score}")
mse = mean_squared_error(y_test_gdp,ridge_pred)
print(f"the mse is : {mse}")
mae = mean_absolute_error(y_test_gdp,ridge_pred)
print(f"the mae is : {mae}")
with open('ridge_reg_gdp.pkl', 'wb') as f:
    pickle.dump(ridge_cv_gdp, f)

Best Alpha is : 0.1
RidgeCV Score: 0.9999975868506279
the mse is : 2.107149092951871
the mae is : 0.5567212134660162


In [67]:
# Use ElasticNetCV for prediction
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import ElasticNet
# Use ElasticNetCV to automatically tune alpha and l1_ratio parameters
# Placing a limit so the alpha parameter value doesn't end up being too large
alpha = np.logspace(-3,10,100 ,base =2)
elastic_net_cv = ElasticNetCV(alphas = alpha , cv=5, l1_ratio=[.1, .5, .7, .9, .95, .99, 1])
elastic_net_cv.fit(X_train_selected, y_train_gdp)

# Best alpha and l1_ratio found by cross-validation
best_alpha_enet = elastic_net_cv.alpha_
best_l1_ratio = elastic_net_cv.l1_ratio_
print(f"Best Alpha: {best_alpha_enet}")
print(f"Best L1 Ratio: {best_l1_ratio}")

# Fit the ElasticNet model with the best parameters
elastic_net_best_gdp = ElasticNet(alpha=best_alpha_enet, l1_ratio=best_l1_ratio)
elastic_net_best_gdp.fit(X_train_selected, y_train_gdp)

elastic_net_pred = elastic_net_best_gdp.predict(X_test_selected)
# Evaluate the model
elastic_net_score = elastic_net_best_gdp.score(X_test_selected, y_test_gdp)
print(f"RidgeCV Score: {elastic_net_score}")
mse = mean_squared_error(y_test_gdp,elastic_net_pred)
print(f"the mse is : {mse}")
mae = mean_absolute_error(y_test_gdp,elastic_net_pred)
print(f"the mae is : {mae}")
# Save the model
with open('elastic_net_gdp.pkl', 'wb') as f:
    pickle.dump(elastic_net_best_gdp, f)

Best Alpha: 29.42127531281359
Best L1 Ratio: 1.0
RidgeCV Score: 0.999984035279352
the mse is : 13.940308470816568
the mae is : 2.825287380575572


In [68]:
X_train, X_test, y_train_cpi, y_test_cpi = train_test_split(X, y_cpi, test_size=0.2, random_state=42)
# Use RidgeCV for prediction
ridge_cv = RidgeCV(cv=5)
ridge_cv.fit(X_train_selected, y_train_cpi)
ridge_pred = ridge_cv.predict(X_test_selected)
best_alpha = ridge_cv.alpha_
print(f"Best Alpha is : {best_alpha}")
# Evaluate the model
ridge_score = ridge_cv.score(X_test_selected, y_test_cpi)
print(f"RidgeCV Score: {ridge_score}")
mse = mean_squared_error(y_test_cpi,ridge_pred)
print(f"the mse is : {mse}")
mae = mean_absolute_error(y_test_cpi,ridge_pred)
print(f"the mae is : {mae}")
# Save the model
with open('ridge_reg_cpi.pkl', 'wb') as f:
    pickle.dump(ridge_cv, f)

Best Alpha is : 10.0
RidgeCV Score: -2.3082005980681988
the mse is : 72278.48057301727
the mae is : 125.80265630609948


In [69]:
# Use ElasticNetCV to automatically tune alpha and l1_ratio parameters
alpha = np.logspace(-2,1,100)
elastic_net_cv = ElasticNetCV(alphas = alpha,cv=5, l1_ratio=[.1, .5, .7, .9, .95, .99, 1])
elastic_net_cv.fit(X_train_selected, y_train_cpi)

# Best alpha and l1_ratio found by cross-validation
best_alpha_enet = elastic_net_cv.alpha_
best_l1_ratio = elastic_net_cv.l1_ratio_
print(f"Best Alpha: {best_alpha_enet}")
print(f"Best L1 Ratio: {best_l1_ratio}")

# Fit the ElasticNet model with the best parameters
elastic_net_best = ElasticNet(alpha=best_alpha_enet, l1_ratio=best_l1_ratio)
elastic_net_best.fit(X_train_selected, y_train_cpi)

elastic_net_pred = elastic_net_best.predict(X_test_selected)
# Evaluate the model
elastic_net_score = elastic_net_best.score(X_test_selected, y_test_cpi)
print(f"RidgeCV Score: {elastic_net_score}")
mse = mean_squared_error(y_test_cpi,elastic_net_pred)
print(f"the mse is : {mse}")
mae = mean_absolute_error(y_test_cpi,elastic_net_pred)
print(f"the mae is : {mae}")
# Save the model
with open('elastic_net_cpi.pkl', 'wb') as f:
    pickle.dump(elastic_net_best, f)

Best Alpha: 10.0
Best L1 Ratio: 0.5
RidgeCV Score: 0.9990509559705677
the mse is : 20.734976133042927
the mae is : 3.135340886256764


In [70]:
!pip install -q dagshub
from dagshub.notebook import save_notebook

# Define the path to your notebook and the desired upload path in the repository
upload_path = "Regression/1960_onwards_regression_GDP_per_capita_.ipynb"  
# Save the notebook to the specified folder in the "Model" branch of the repository
save_notebook(repo="Omdena/KadunaNigeriaChapter_NigeriaEconomy", path=upload_path, branch="Model")




NotADirectoryError: [WinError 267] The directory name is invalid: 'C:\\Users\\JESSE\\AppData\\Local\\Temp\\tmplluh4v7i\\1960_onwards_regression_GDP_per_capita_.ipynb'