In [1]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import matplotlib.patches as mpatches
import seaborn as sb
import pandas as pd
import os
import ipywidgets as widgets
import warnings
warnings.filterwarnings("ignore")
from turtle import color
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
 
%matplotlib inline
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('ggplot')
 
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import ElasticNetCV
from sklearn.metrics import mean_squared_error 
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.impute import SimpleImputer
from Project.Utils.visualize import  get_zone
from sklearn.neighbors import KNeighborsRegressor
from Project.Utils.visualize import normalize_by_country


In [2]:
output_path = os.getcwd() + '/Output/'
cluster_path = output_path + '/Cluster/'
df_gold = pd.read_csv(output_path + 'GoldDataframe.csv', index_col = ['Country', 'Year', 'Region'])
df_cluster = pd.read_csv(cluster_path + 'All indicators.csv')
country_list = list(np.sort(df_gold.index.get_level_values('Country').unique()))

In [3]:
def get_cluster_countries(country):
    #Given a country, obatin data from the same countries in the cluster. 
    country_cluster_target = df_cluster.loc[df_cluster['Country'] == country]['Cluster'].item()
    country_cluster_list = df_cluster.loc[df_cluster['Cluster'] == country_cluster_target]['Country'].tolist()

    df = df_gold.loc[df_gold.index.get_level_values('Country').isin(country_cluster_list)].sort_index(level = 1)
    df = normalize_by_country(df)
    df.dropna(axis=1, inplace=True)
    df.reset_index(drop = True, inplace=True)
    
    return df, country_cluster_list

In [4]:
def machine_algorithm(df, country_cluster_list):
      #Separate Features and target
      X_Target= df.iloc[len(country_cluster_list):]  ##Original gdp
      X_Target = X_Target.loc[:, X_Target.columns == "GDP"] 
      feature_df= df.iloc[:-len(country_cluster_list)]  ##Indicators
      feature_df= feature_df.loc[:, feature_df.columns != 'GDP']
      #Split the data
      X_train, X_test, y_train, y_test = train_test_split(feature_df, X_Target, test_size=0.3, random_state=0)
      alphas = [0.0001, 0.001, 0.01, 0.1, 0.3, 0.5, 0.7, 1]
      elastic_cv= ElasticNetCV(alphas=alphas, cv=5, max_iter = 100000, tol = 0.001)
      model = elastic_cv.fit(X_train, y_train)
      ypred = model.predict(X_test)
      score = model.score(X_test, y_test)
      mse = mean_squared_error(y_test, ypred)
      #print("R2:{0:.4f}, MSE:{1:.4f}, RMSE:{2:.4f}"
      #      .format(score, mse, np.sqrt(mse)))
      return model, score, feature_df

In [5]:
def plot_machine(model, score, feature_df, country):
    df_country = df_gold.loc[df_gold.index.get_level_values('Country') == country]
    df_country = normalize_by_country(df_country)

    pred_df  = df_country.copy().dropna(axis=1)
    pred_df= pred_df.loc[:, pred_df.columns != 'GDP']
    pred_df= pred_df.loc[:, pred_df.columns.isin(feature_df.columns)] # You have to use the same indicators as the trained model (all countries must have data for the indicator, if not is dropped)
    prediction  = model.predict(pred_df)

    print(prediction)
    aux_line = np.array([df_country['GDP'][-1],prediction[-1]])
    std_1 = 1 - score

    plt.plot(2021, prediction[-1], 'ro', label='Prediction') ##Dot
    plt.fill_between(np.array([2020,2021]), aux_line * (1 - std_1), aux_line * (1 + std_1), color='r', alpha=0.2) ##Fill
    plt.plot([2020,2021], aux_line, 'b-', color = 'red', linestyle='--') ##Discontinous line
    plt.plot(df_country.index.get_level_values('Year').values, df_country['GDP'], 'b-', label='Original', color = 'blue') ##Original GDP

    plt.legend()
    plt.show()


In [6]:
def table_machine(zone):
    df, country_cluster_list = get_cluster_countries(zone)
    model, score, feature_df = machine_algorithm(df, country_cluster_list)
    plot_machine(model, score, feature_df, zone)


zone_drop_machine = widgets.Dropdown(
    options= country_list,
    value ='Afghanistan',
    description='Zone:',
)

widgets.interact(table_machine, zone = zone_drop_machine)

interactive(children=(Dropdown(description='Zone:', options=('Afghanistan', 'Albania', 'Algeria', 'Angola', 'A…

<function __main__.table_machine(zone)>

In [7]:
df = df_gold.loc[df_gold.index.get_level_values('Country') == 'Spain']
df = normalize_by_country(df)
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,AgriShareGDP,CreditToAgriFishForest,EmploymentRural,GDP,% Soldiers,Employment in industry,Employment in services,Birth Rate,Cost business start-up,Death Rate,...,Researchers in R&D,R&D expenditure %GDP,% Rural Population,Tertiary School Gender Parity,% Vulnerable female employment,% Vulnerable male employment,Civil Liberties,Freedom of Expression,% Healthcare Investment,Population
Country,Year,Region,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
Spain,2000,Europe and Central Asia,1.0,0.375098,0.670979,0.0,1.0,0.947544,0.034916,0.642857,0.984733,0.777778,...,0.0,0.0,1.0,0.254446,1.0,0.910506,0.7,1.0,0.0,0.0
Spain,2001,Europe and Central Asia,1.0,0.263661,0.720201,0.029564,0.695104,1.0,0.0,0.666667,0.984733,0.666667,...,0.0,0.010657,0.98219,0.0,0.98374,0.96498,0.7,1.0,0.039855,0.07903
Spain,2002,Europe and Central Asia,0.866434,0.39971,0.776438,0.105551,0.989051,0.965029,0.067737,0.714286,0.984733,0.666667,...,0.0,0.14723,0.940413,0.442964,0.721545,0.750973,0.7,1.0,0.036232,0.17491
Spain,2003,Europe and Central Asia,0.825463,0.372793,0.820794,0.300056,0.589716,0.937552,0.111034,0.785714,0.984733,1.0,...,0.164496,0.269722,0.886544,0.446297,0.609756,0.338521,0.7,1.0,0.521739,0.283044
Spain,2004,Europe and Central Asia,0.611023,0.436677,0.872617,0.457272,0.469992,0.89592,0.162011,0.833333,1.0,0.444444,...,0.307059,0.302554,0.832894,0.867592,0.520325,0.400778,0.7,1.0,0.539855,0.396773
Spain,2005,Europe and Central Asia,0.356609,0.506828,0.923919,0.541011,0.387588,0.842631,0.219273,0.833333,0.961832,0.666667,...,0.450215,0.414809,0.779903,0.809444,0.560976,0.513619,0.7,1.0,0.652174,0.510324
Spain,2006,Europe and Central Asia,0.103548,0.474867,0.941679,0.644105,0.345115,0.828476,0.265363,0.880952,0.938931,0.111111,...,0.536116,0.563015,0.727353,0.894074,0.400406,0.361868,1.0,1.0,0.764493,0.623663
Spain,2007,Europe and Central Asia,0.171719,0.216057,0.994741,0.851003,0.291997,0.80433,0.306564,0.904762,0.854962,0.333333,...,0.634476,0.684531,0.675022,0.955001,0.343496,0.284047,1.0,1.0,0.724638,0.735574
Spain,2008,Europe and Central Asia,0.072943,0.251587,1.0,1.0,0.245681,0.697752,0.428771,1.0,0.839695,0.222222,...,0.762884,0.846149,0.623131,0.944444,0.272358,0.264591,1.0,1.0,0.811594,0.837778
Spain,2009,Europe and Central Asia,0.0,0.13812,0.869895,0.86407,0.217347,0.427977,0.649441,0.833333,0.847328,0.111111,...,0.788787,0.92008,0.57168,1.0,0.126016,0.066148,1.0,1.0,0.800725,0.920013
