# 7.3 *cufflinks*: More Charts

- [Maps](#Maps)   
- [Scatter Plots](#Scatter-Plots) 
- [Bubble Charts](#Bubble-Charts)


- [**Other Charts**](#Other-Charts)    
  - [Ratio Charts](#Spread-and-Ratio-Charts)  


In [18]:
from IPython.display import display, HTML
import pandas as pd
import numpy as np
import math

# Using plotly + cufflinks in offline mode

import plotly.plotly as py
import plotly.graph_objs as go
import plotly
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
import cufflinks as cf
cf.set_config_file(offline=True)

# Read Country data into dataframe

In [19]:
df = pd.read_csv('Data\\Country_Data.csv')
df.head()

Unnamed: 0,country,continent,year,lifeExpectancy,population,gdpPerCapita
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303
2,Afghanistan,Asia,1962,31.997,10267083,853.10071
3,Afghanistan,Asia,1967,34.02,11537966,836.197138
4,Afghanistan,Asia,1972,36.088,13079460,739.981106


In [20]:
print("Number of Rows in df:  ", df.shape[0])
print("Number of Columns  in df:  ", df.shape[1])

Number of Rows in df:   1704
Number of Columns  in df:   6


# Maps  
- This is called a **Choropleth Map** (or a Filled Map for Tableau users)  
- This uses original (Tall), not Pivoted (Wide) data
- But we use a Row Filter to get a particular year:  1952

In [21]:
df.head(2)

Unnamed: 0,country,continent,year,lifeExpectancy,population,gdpPerCapita
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303


In [22]:
df['year'].unique()

array([1952, 1957, 1962, 1967, 1972, 1977, 1982, 1987, 1992, 1997, 2002,
       2007], dtype=int64)

In [23]:
row_filter = df['year'] == 1952
df_1952 = df[row_filter]
df_1952.head()

Unnamed: 0,country,continent,year,lifeExpectancy,population,gdpPerCapita
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
12,Albania,Europe,1952,55.23,1282697,1601.056136
24,Algeria,Africa,1952,43.077,9279525,2449.008185
36,Angola,Africa,1952,30.015,4232095,3520.610273
48,Argentina,Americas,1952,62.485,17876956,5911.315053


In [24]:
# Sort by Highest GDP to lowest
df_1952.sort_values('gdpPerCapita', ascending=False, inplace=True)
df_1952.head()

Unnamed: 0,country,continent,year,lifeExpectancy,population,gdpPerCapita
852,Kuwait,Asia,1952,55.565,160000,108382.3529
1476,Switzerland,Europe,1952,69.62,4815000,14734.23275
1608,United States,Americas,1952,68.44,157553000,13990.48208
240,Canada,Americas,1952,68.75,14785584,11367.16112
1092,New Zealand,Oceania,1952,69.39,1994794,10556.57566


In [25]:
# Choropleth Maps for Per Capita GDP
df_1952.iplot(
    kind='choropleth', 
    locations='country',  
    z ='gdpPerCapita',
    text = 'country', 
    locationmode = 'country names', 
    theme='white',
    colorscale='blues',
    #colorscale='greens', 
    title = "GDP per Capta",
    projection = dict(
            type = 'natural earth'
        ))

# Scatter Plots  
- This uses original (Tall), not Pivoted (Wide) data

In [26]:
df.iplot(kind='scatter', 
         x='population',
         y='gdpPerCapita',
         mode='markers',
         symbol='circle-dot',
         colors=['orange','teal'],
         size=10,
         xTitle='Population',
         yTitle='Per Capita GDP',
         title='Scatter Plot')

# Bubble Charts  
- This uses original (Tall), not Pivoted (Wide) data

In [27]:
df.head(2)

Unnamed: 0,country,continent,year,lifeExpectancy,population,gdpPerCapita
0,Afghanistan,Asia,1952,28.801,8425333,779.445314
1,Afghanistan,Asia,1957,30.332,9240934,820.85303


In [28]:
row_filter = df['year'] == 2007
df_2005 = df[row_filter]
df_2005.head()

Unnamed: 0,country,continent,year,lifeExpectancy,population,gdpPerCapita
11,Afghanistan,Asia,2007,43.828,31889923,974.580338
23,Albania,Europe,2007,76.423,3600523,5937.029526
35,Algeria,Africa,2007,72.301,33333216,6223.367465
47,Angola,Africa,2007,42.731,12420476,4797.231267
59,Argentina,Americas,2007,75.32,40301927,12779.37964


In [29]:
df_2005.iplot(kind='bubble', 
         x='population',
         y='gdpPerCapita',
         size='lifeExpectancy', 
         categories='continent',
         text='country',
         theme='solar',
         #colorscale='blues',
         xTitle='Population', 
         yTitle='GDP per Capita'
        )

# Other Charts

### Pivot Data from Tall to Wide  
- We do this for only one, selected value:  gdpPerCapita 

In [30]:
df['country'].unique()

array(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina',
       'Australia', 'Austria', 'Bahrain', 'Bangladesh', 'Belgium',
       'Benin', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
       'Canada', 'Central African Republic', 'Chad', 'Chile', 'China',
       'Colombia', 'Comoros', 'Congo, Dem. Rep.', 'Congo, Rep.',
       'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Czech Republic',
       'Denmark', 'Djibouti', 'Dominican Republic', 'Ecuador', 'Egypt',
       'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Ethiopia',
       'Finland', 'France', 'Gabon', 'Gambia', 'Germany', 'Ghana',
       'Greece', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Haiti',
       'Honduras', 'Hong Kong, China', 'Hungary', 'Iceland', 'India',
       'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy',
       'Jamaica', 'Japan', 'Jordan', 'Kenya', 'Korea, Dem. Rep.',
       'Korea, Rep.', 'Kuwait', 'Leba

In [31]:
# Pivot:  value = gdpPerCapita
df_wide_GDP = df.pivot(index='year', 
                       columns='country', 
                       values='gdpPerCapita')
df_wide_GDP

country,Afghanistan,Albania,Algeria,Angola,Argentina,Australia,Austria,Bahrain,Bangladesh,Belgium,...,Uganda,United Kingdom,United States,Uruguay,Venezuela,Vietnam,West Bank and Gaza,"Yemen, Rep.",Zambia,Zimbabwe
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1952,779.445314,1601.056136,2449.008185,3520.610273,5911.315053,10039.59564,6137.076492,9867.084765,684.244172,8343.105127,...,734.753484,9979.508487,13990.48208,5716.766744,7689.799761,605.066492,1515.592329,781.717576,1147.388831,406.884115
1957,820.85303,1942.284244,3013.976023,3827.940465,6856.856212,10949.64959,8842.59803,11635.79945,661.637458,9714.960623,...,774.371069,11283.17795,14847.12712,6150.772969,9802.466526,676.285448,1827.067742,804.830455,1311.956766,518.764268
1962,853.10071,2312.888958,2550.81688,4269.276742,7133.166023,12217.22686,10750.72111,12753.27514,686.341554,10991.20676,...,767.27174,12477.17707,16173.14586,5603.357717,8422.974165,772.04916,2198.956312,825.623201,1452.725766,527.272182
1967,836.197138,2760.196931,3246.991771,5522.776375,8052.953021,14526.12465,12834.6024,14804.6727,721.186086,13149.04119,...,908.918522,14142.85089,19530.36557,5444.61962,9541.474188,637.123289,2649.715007,862.442146,1777.077318,569.795071
1972,739.981106,3313.422188,4182.663766,5473.288005,9443.038526,16788.62948,16661.6256,18268.65839,630.233627,16672.14356,...,950.735869,15895.11641,21806.03594,5703.408898,10505.25966,699.501644,3133.409277,1265.047031,1773.498265,799.362176
1977,786.11336,3533.00391,4910.416756,3008.647355,10079.02674,18334.19751,19749.4223,19340.10196,659.877232,19117.97448,...,843.733137,17428.74846,24072.63213,6504.339663,13143.95095,713.53712,3682.831494,1829.765177,1588.688299,685.587682
1982,978.011439,3630.880722,5745.160213,2756.953672,8997.897412,19477.00928,21597.08362,19211.14731,676.981866,20979.84589,...,682.266227,18232.42452,25009.55914,6920.223051,11152.41011,707.235786,4336.032082,1977.55701,1408.678565,788.855041
1987,852.395945,3738.932735,5681.358539,2430.208311,9139.671389,21888.88903,23687.82607,18524.02406,751.979403,22525.56308,...,617.724406,21664.78767,29884.35041,7452.398969,9883.584648,820.799445,5107.197384,1971.741538,1213.315116,706.157306
1992,649.341395,2497.437901,5023.216647,2627.845685,9308.41871,23424.76683,27042.01868,19035.57917,837.810164,25575.57069,...,644.170797,22705.09254,32003.93224,8137.004775,10733.92631,989.023149,6017.654756,1879.496673,1210.884633,693.420786
1997,635.341351,3193.054604,4797.295051,2277.140884,10967.28195,26997.93657,29095.92066,20292.01679,972.770035,27561.19663,...,816.559081,26074.53136,35767.43303,9230.240708,10165.49518,1385.896769,7110.667619,2117.484526,1071.353818,792.44996


In [32]:
# Show columns (which are the countries!)
df_wide_GDP.columns

Index(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina', 'Australia',
       'Austria', 'Bahrain', 'Bangladesh', 'Belgium',
       ...
       'Uganda', 'United Kingdom', 'United States', 'Uruguay', 'Venezuela',
       'Vietnam', 'West Bank and Gaza', 'Yemen, Rep.', 'Zambia', 'Zimbabwe'],
      dtype='object', name='country', length=142)

# Ratio Charts 
- This uses the pivoted df_wide_GDP dataframe

In [33]:
df_wide_GDP.columns

Index(['Afghanistan', 'Albania', 'Algeria', 'Angola', 'Argentina', 'Australia',
       'Austria', 'Bahrain', 'Bangladesh', 'Belgium',
       ...
       'Uganda', 'United Kingdom', 'United States', 'Uruguay', 'Venezuela',
       'Vietnam', 'West Bank and Gaza', 'Yemen, Rep.', 'Zambia', 'Zimbabwe'],
      dtype='object', name='country', length=142)

In [34]:
countries_to_plot = ['United States', 'Canada']

df_wide_GDP[countries_to_plot].iplot(kind='ratio', 
            colors=['green','red'],
            title='Ratio Chart')