# Estimating Economic Growth using Deep Learning

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.impute import KNNImputer
from sklearn.decomposition import PCA

In [None]:
data = pd.read_csv('data.csv')
data

## Analysing missing data

In [None]:
data.isnull().sum()

## Calculating GDP per capita

In [None]:
data['gdp_per_capita'] = data.gdp / data.population
data.tail()

## Dropping the data with missing GDP per capita

In [None]:
data.dropna(subset=['gdp_per_capita'], inplace=True)
data.shape

In [None]:
data.reset_index(inplace=True)
data.drop('index', axis=1, inplace=True)

## Imputing the NaN values using k-Nearest Neighbors

In [None]:
imp = KNNImputer(n_neighbors=20)
data.iloc[:, 2:] = imp.fit_transform(data.iloc[:, 2:])
data.head()

## Changing base year of GDP deflator of every country to 2005

In [None]:
countries = data.country.unique()
len(countries)

In [None]:
for country in countries:
    base = data['gdp_deflator'][(data.country == country) & (data.year == 2005)]
    if(len(base) == 0):
        base = data['gdp_deflator'][data.country == country].mean()
    else:
        base = base.iloc[0]
    data.loc[data.country == country, 'gdp_deflator'] /= base / 100
data.head()

## Converting nominal variables to real variables

In [None]:
data.columns

In [None]:
nominal_var = ['gdp', 'foreign_direct_investment', 'gross_savings', 'education_expenditure', 'imports', 'exports', 'manufacturing', 'agriculture_forestry_fishing', 'industry', 'gdp_per_capita']
for var in nominal_var:
    data[var] /= data.gdp_deflator / 100
data.drop('gdp_deflator', axis=1, inplace=True)
data.head()

## Calculating change in GDP per capita

In [None]:
data_change = data.copy()
data_change['gdp_per_capita2'] = 0
data_change['gdp_per_capita2'][:-1] = data_change['gdp_per_capita'][1:]
data_change['gdp_per_capita_change'] = data_change.gdp_per_capita2 - data_change.gdp_per_capita
data_change.tail()

In [None]:
data_change.iloc[:-1, 1:-3] = data_change.iloc[1:, 1:-3]
data_change.tail()

In [None]:
data_change.set_index('year', inplace=True)
data_change.drop(2015, inplace=True)
data_change.reset_index(inplace=True)
data_change.year = data_change.year + 1
data_change

In [None]:
data.drop(['year', 'country', 'gdp'], axis=1, inplace=True)
data_change.drop(['year', 'country', 'gdp', 'gdp_per_capita', 'gdp_per_capita2'], axis=1, inplace=True)

## Data Visualization

### Plot of the target variable

In [None]:
data.gdp_per_capita.plot()
print('Mean:', data.gdp_per_capita.mean())
print('Standard Deviation:', data.gdp_per_capita.std())
print('Mean Absolute Deviation:', data.gdp_per_capita.mad())

In [None]:
data_change.gdp_per_capita_change.plot()
print('Mean:', data_change.gdp_per_capita_change.mean())
print('Standard Deviation:', data_change.gdp_per_capita_change.std())
print('Mean Absolute Deviation:', data_change.gdp_per_capita_change.mad())

### Correlation heatmap between the variables

In [None]:
plt.figure(figsize=(15, 15))
matrix = np.triu(data.corr())
sns.heatmap(data.corr(), mask=matrix, square=True, linewidths=0.5)

In [None]:
plt.figure(figsize=(15, 15))
matrix = np.triu(data_change.corr())
sns.heatmap(data_change.corr(), mask=matrix, square=True, linewidths=0.5)

### Scatter plot between different input variables and target variable

In [None]:
columns = data.columns
for i in range(data.shape[1]-1):
  data.plot.scatter(x=columns[i], y=columns[-1])

In [None]:
columns_change = data_change.columns
for i in range(data_change.shape[1]-1):
    data_change.plot.scatter(x=columns_change[i], y=columns_change[-1])

## Saving the data to a CSV file

In [None]:
data.to_csv('data_final.csv', index=False)
data_change.to_csv('data_change_final.csv', index=False)