# Visually Exploring Data
 using World Development Indicators

In [None]:
%matplotlib inline

import pandas as pd
import numpy as np
import random
import matplotlib.pyplot as plt


In [None]:
data = pd.read_csv('../_data/../_data/WDIData_processed.csv')

In [None]:
data.info()

This is a really large dataset, at least in terms of the number of rows.  But with 6 columns, what does this hold?

In [None]:
data.head(2)

## Filter complete features

### Select years

In [None]:
years = [2010, 2011, 2012, 2013, 2014, 2015]
df = data.loc[data['Year'].isin(years)]

### Select countries and indicators that have full set of years; count==len(years)

In [None]:
df = df.groupby(['CountryName', 'IndicatorName']).count()
df = df[df['Value']==len(years)]

### Reset grouping and slice features; 'CountryName', 'IndicatorName'

In [None]:
df = df.reset_index()
df = df.loc[:,['CountryName', 'IndicatorName']]

### Sample from complete features

In [None]:
countries = df['CountryName'].sample(2).values.tolist()
indicator = df['IndicatorName'].sample().values[0]
countries, indicator

## Select from data based on sampled features

In [None]:
filteredData1 = data.loc[(data['CountryName'] == countries[0]) & (data['IndicatorName'] == indicator) & (data['Year'].isin(years))]
filteredData2 = data.loc[(data['CountryName'] == countries[1]) & (data['IndicatorName'] == indicator) & (data['Year'].isin(years))]

In [None]:
filteredData1

In [None]:
filteredData2

## Scatter Plot

In [None]:
fig, axis = plt.subplots()
# Grid lines, Xticks, Xlabel, Ylabel

axis.yaxis.grid(True)
axis.set_title(indicator,fontsize=10)
axis.set_xlabel(filteredData1['CountryName'].iloc[0],fontsize=10)
axis.set_ylabel(filteredData2['CountryName'].iloc[0],fontsize=10)

X = filteredData1['Value']
Y = filteredData2['Value']

axis.scatter(X, Y);

## Line Plot

In [None]:
fig, ax = plt.subplots(figsize=(20, 10))

# 1. Adjust the lower and upper limit to bring the graph at center
ax.set_ylim(min(0, filteredData2['Value'].min()), 2*filteredData2['Value'].max())

# 2. Place in perspective of full dataset
# ax.set_ylim(data['Value'].min(), data['Value'].max())

ax.set_title('Indicator Name : ' + indicator)

ax.plot(filteredData1['Year'], filteredData1['Value'] , 'r--', label=filteredData1['CountryName'].unique()) 

ax.plot(filteredData2['Year'], filteredData2['Value'] , label=filteredData2['CountryName'].unique(),
         color="purple", lw=1, ls='-', 
         marker='s', 
         markersize=10, 
         markerfacecolor="white", 
         markeredgewidth=1, 
         markeredgecolor="blue") 

# Add the legend
legend = plt.legend(loc = 'upper center', 
#                     shadow=True,
                    prop={'weight':'roman','size':'xx-large'})

# Rectangle around the legend
frame = legend.get_frame()
frame.set_facecolor('.95')
plt.show();

## 3D plot

In [None]:
from mpl_toolkits.mplot3d import Axes3D
from matplotlib import cm

countof_angles = 36
countof_radii  = 8

# array - radii
array_rad = np.linspace(0.125, 1.0, countof_radii)

# array - angles
array_ang = np.linspace(0, 2*np.pi, countof_angles, endpoint=False)

# repeat all angles per radius
array_ang = np.repeat(array_ang[...,np.newaxis], countof_radii, axis=1)

# from polar (radii, angles) coords to cartesian (x, y) coords
x = np.append(0, (array_rad*np.cos(array_ang)).flatten())
y = np.append(0, (array_rad*np.sin(array_ang)).flatten())

# saddle shaped surface
z = np.sin(-x*y)

fig = plt.figure(figsize=(20,10))
ax  = fig.gca(projection='3d')

ax.plot_trisurf(x, y, z, cmap=cm.autumn, linewidth=0.2)

plt.show()
fig.savefig("vis_3d.png");

## Bubble plot

In [None]:
n_points = 200
radius   = 2 * np.random.rand(n_points)
angles   = 2 * (np.pi) * np.random.rand(n_points)
area     = 400 * (radius**2) * np.random.rand(n_points)

colors = angles

fig = plt.figure(figsize=(20,10))
ax  = plt.subplot(111, polar=True)

c = plt.scatter(angles, radius, c=colors, s=area, cmap=plt.cm.hsv)
c.set_alpha(1.95)

plt.show()
fig.savefig("vis_bubbleplot.png");

## Boxplot

In [None]:
np.random.seed(452)

# Three ararys of 100 points each
A1 = np.random.normal(0, 1, 100)
A2 = np.random.normal(0, 2, 100)
A3 = np.random.normal(0, 1.5, 100)

# Concatenate the three arrays
data = [ A1, A2, A3 ]

fig, axes = plt.subplots(nrows=1, ncols=3, figsize=(20, 10))


# Box plot: Notch Shape
bplot1 = axes[1].boxplot(data,
                         notch=True,  
                         vert=True,           # vertical aligmnent
                         patch_artist=True)   # color

# Box plot: Rectangular 
bplot2 = axes[0].boxplot(data,
                         vert=True,           # vertical aligmnent
                         patch_artist=True)   # color

# Box plot: Rectangular 
bplot3 = axes[2].violinplot(data,
                         vert=True,           # vertical aligmnent
                         )   # color


colors = ['tomato', 'darkorchid', 'lime']
# more colors here: http://matplotlib.org/examples/color/named_colors.html

for bplot in (bplot1, bplot2):
    for patch, color in zip(bplot['boxes'], colors):
        patch.set_facecolor(color)

# Grid lines, Xticks, Xlabel, Ylabel
for axis in axes:
    axis.yaxis.grid(True)
    axis.set_xticks([y for y in range(len(data))], )
    axis.set_xlabel('Sample X-Label',fontsize=20)
    axis.set_ylabel('Sample Y-Label',fontsize=20)

    
# Xtick labels
plt.setp(axes, xticks=[y for y in range(len(data))],
         xticklabels=['X1', 'X2', 'X3'])

plt.show()
fig.savefig("vis_boxplot.png");