<a href="https://colab.research.google.com/github/hartwj/Data_Viz/blob/main/Data_Viz_Altair.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Visualization with Altair

To display my data viz skills in Python, I used a world history dataset that examines economic and health data by country over time. 

In [1]:
import altair as alt
from altair import datum

alt.data_transformers.enable('default', max_rows=None)

from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import pandas as pd
import numpy as np

In [2]:
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1URcapok9lIdBkFBGabGaslcABcIJqNEc' -O worldhistorydata.csv

--2021-03-01 01:48:50--  https://docs.google.com/uc?export=download&id=1URcapok9lIdBkFBGabGaslcABcIJqNEc
Resolving docs.google.com (docs.google.com)... 172.217.204.101, 172.217.204.139, 172.217.204.113, ...
Connecting to docs.google.com (docs.google.com)|172.217.204.101|:443... connected.
HTTP request sent, awaiting response... 302 Moved Temporarily
Location: https://doc-14-7k-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/va2k234ejo2dalu02rad3k7c2aoj5amv/1614563325000/14947468445787936960/*/1URcapok9lIdBkFBGabGaslcABcIJqNEc?e=download [following]
--2021-03-01 01:48:51--  https://doc-14-7k-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/va2k234ejo2dalu02rad3k7c2aoj5amv/1614563325000/14947468445787936960/*/1URcapok9lIdBkFBGabGaslcABcIJqNEc?e=download
Resolving doc-14-7k-docs.googleusercontent.com (doc-14-7k-docs.googleusercontent.com)... 172.217.203.132, 2607:f8b0:400c:c07::84
Connecting to doc-14-7k-docs.googleusercontent.com (doc-14

In [3]:
data = pd.read_csv('worldhistorydata.csv')
data['Year'] = pd.to_datetime(data['Year'], format='%Y', errors='ignore')
print(data.shape)
data.head()

(12594, 11)


Unnamed: 0.1,Unnamed: 0,Country,Year,Population,LifeExp,GDP,Code,Region,Area,Coastline,Government
0,1,Afghanistan,1975-01-01,12582954,44.0,1201,AF,Asia,652230,0,islamic republic
1,2,Afghanistan,1960-01-01,8994793,35.6846,1206,AF,Asia,652230,0,islamic republic
2,3,Afghanistan,1974-01-01,12315553,43.5,1170,AF,Asia,652230,0,islamic republic
3,4,Afghanistan,1993-01-01,14824371,50.5,810,AF,Asia,652230,0,islamic republic
4,5,Afghanistan,1966-01-01,10148841,39.38804,1168,AF,Asia,652230,0,islamic republic


In [4]:
#removing multiple years per country, taking most recent
world_data = data.loc[data.groupby('Country').Year.idxmax()]


In [5]:
#checking data
print(world_data.head())
print(world_data.shape)

     Unnamed: 0      Country  ... Coastline               Government
68           69  Afghanistan  ...         0         islamic republic
125         126      Albania  ...       362                 republic
179         180      Algeria  ...       998                 republic
257         258      Andorra  ...         0  constitutional monarchy
298         299       Angola  ...      1600                 republic

[5 rows x 11 columns]
(179, 11)


In [6]:
#How has life expectancy changed over time by region?
Chart1 = alt.Chart(data).mark_line().encode(
    x = 'Year:T',
    y = 'mean(LifeExp)', 
    color = 'Region:N').properties(
    title=
    {"text": ['Life Expectancy over Time by Continent']})
Chart1

In [7]:
#How has world GDP changed over time?
#Australia, Europe, then Asia
Chart2 = alt.Chart(data).mark_area().encode(
    x = 'Year:T',
    y = 'mean(GDP)', 
    color = 'Region:N').properties(
    title=
    {"text": ['World GDP over Time by Continent']})
Chart2

In [8]:
#How has GDP changed over time in North America?
#US and Canada lead
Chart3 = alt.Chart(data[data['Region'] == "North America"]).mark_line().encode(
    x = 'Year:T',
    y = 'GDP', 
    color = 'Country:N').properties(
    title=
    {"text": ['GDP in North America over Time']})
Chart3

In [9]:
#How has GDP changed over time in North America?
#US and Canada lead
Chart4 = alt.Chart(data[data['Region'] == "North America"]).mark_circle(opacity=.6).encode(
    x = 'mean(LifeExp)',
    y = 'mean(GDP)', 
    color = 'Country:N', 
    size = alt.Size('mean(Population)', scale=alt.Scale(range=[100,500]))).properties(
    title=
    {"text": ['Relationship between Life Expectancy and GDP in North America'],
    "subtitle":["Size based on Population"]})
Chart4

In [10]:
#Which governments have the highest total GDP?
Chart5 = alt.Chart(world_data).mark_bar().encode(
    x = 'Government',
    y = alt.Y('GDP', aggregate='mean'),
    color = 'Region:N'
).properties(
    title=
    {"text": ['Mean GDP by Government Type']})

Chart5

In [11]:
Chart6 = alt.Chart(world_data[world_data['Government'] == "constitutional monarchy"]).mark_bar().encode(
    x = alt.X('Country', sort=alt.EncodingSortField(field='meanGDP', op="mean", order='descending')),
    y = alt.Y('meanGDP:Q')
).transform_aggregate(
    meanGDP='mean(GDP)',
    groupby=['Country']
).transform_window(
    window=[{'op': 'rank', 'as': 'rank'}],
    sort=[{'field': 'meanGDP', 'order': 'descending'}]
).transform_filter('datum.rank <= 20').properties(
    title=
    {"text": ['Constitutional Monarchy Mean GDP by Country']})

Chart6