# Visualise and Analyse Final Dataset

In [1]:
%matplotlib inline

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from scipy.stats import mode

import statsmodels.formula.api as smf

The lastest year with most data available is **2014**. Whenever a year is to be chosen, analysis will be conducted for  this year. Will use a range if it is necessary to generalise, acquire more results, this is another reason that a variable is used instead of hard coding.

In [3]:
magic_year = 2014

In [4]:
ds = pd.read_csv("project_data.csv")

ds.head()

Unnamed: 0,Country Name,Country Code,Year,GDP,Economic Freedom Index,Democracy Score
0,Aruba,ABW,2011,25324.720363,,
1,Afghanistan,AFG,2011,603.537023,,2.48
2,Angola,AGO,2011,4299.012889,5.13,3.32
3,Albania,ALB,2011,4437.178068,7.25,5.81
4,Andorra,AND,2011,40919.183279,,


In [5]:
ds[ds['Country Name'] == 'OECD members']

Unnamed: 0,Country Name,Country Code,Year,GDP,Economic Freedom Index,Democracy Score


In [6]:
ds.columns

Index([u'Country Name', u'Country Code', u'Year', u'GDP',
       u'Economic Freedom Index', u'Democracy Score'],
      dtype='object')

Decided not to include 2015 and 2016

In [7]:
print(len(ds))
ds = ds[ds.Year < 2015]
print(len(ds))

1392
928


## Visualise and Describe Democracy, GDP, and a Financial Liberties values for a single year

### GDP

In [None]:
gdp_data = ds[ds['Year'] == magic_year]['GDP'].dropna()

# gdp_data

plt.figure()
plt.boxplot([gdp_data / 1000], 0, 'gD', 0, labels=["GDP pca"])
plt.title("GDP per capita in $1,000 for %s" % magic_year)
plt.show()

In [None]:
gdp_data.describe()

In [None]:
gdp_data.var()

In [None]:
kgdp = map(int, gdp_data / 1000)
# print(kgdp)
mode(kgdp)

In [None]:
sns.stripplot(data=(gdp_data.values / 1000), jitter=True, color="g")

### Economic Freedom

In [None]:
economic_freedom = ds[ds['Year'] == magic_year]['Economic Freedom Index'].dropna()

# econ_freedom_data

plt.figure()
plt.boxplot([economic_freedom], 0, 'cD', 0, labels=["Economic Freedom"])
plt.title("Economic Freedom Index for %s" % magic_year)
plt.show()

In [None]:
sns.stripplot(data=economic_freedom.values, jitter=True, color="c", marker="d")

In [None]:
economic_freedom.describe()

In [None]:
economic_freedom.var()

In [None]:
economic_freedom_ints = map(int, economic_freedom)
# print(economic_freedom_ints)
mode(economic_freedom_ints)

## Civil Freedom

In [None]:
civil_freedom = ds[ds['Year'] == magic_year]['Democracy Score'].dropna()

# civil_freedom_data

plt.figure()
plt.boxplot([civil_freedom], 0, 'bD', 0, labels=["Democracy Score"])
plt.title("Freedom Index for %s" % magic_year)
plt.show()

In [None]:
sns.stripplot(data=civil_freedom.values, jitter=True, color="b", marker="v")

In [None]:
civil_freedom.describe()

In [None]:
civil_freedom.var()

In [None]:
civil_freedom_ints = map(int, civil_freedom)
# print(civil_freedom_ints)
mode(civil_freedom_ints)

## Visualise relationships

In [None]:
df_viz1 = ds.dropna()

# Per usual divide GDP w. 1000
df_viz1['GDP'] = df_viz1['GDP'] / 1000

df_viz1.head()

In [None]:
df_viz1['color'] = 'cyan'

sns.lmplot('GDP', 'Economic Freedom Index', df_viz1, line_kws={'color': 'cyan'}, markers=["d"],
           hue="color", palette=dict(cyan="cyan"))

In [None]:
df_viz1['color'] = 'blue'

sns.lmplot('GDP', 'Democracy Score', df_viz1,
           line_kws={'color': 'blue'}, markers=["v"],
           hue="color", palette=dict(blue="blue"))

In [None]:
sns.lmplot('Economic Freedom Index', 'Democracy Score', df_viz1)

### Correlation Heatmap

In [None]:
correlations = df_viz1[['GDP', 'Economic Freedom Index', 'Democracy Score']].corr()
sns.heatmap(correlations)

## Relationships summaries

In [None]:
df_ = df_viz1.rename(columns={
    'Economic Freedom Index': 'EconomicFreedomIndex',
    'Democracy Score': 'DemocracyScore'
})

lm = smf.ols(formula='GDP ~ EconomicFreedomIndex + DemocracyScore', data=df_).fit()

lm.summary()