In [1]:
#import libraries
import pandas as pd
pd.options.display.max_columns = None
import altair as alt
alt.renderers.enable('mimetype')
from vega_datasets import data
import vega
from IPython.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
import jupyterlab

### Read data, join multiple datasets, general statistics before cleaning 

In [2]:
#Reading in the main dataset which is world 
df=pd.read_csv('cause_of_deaths.csv')
df.head()

Unnamed: 0,Country/Territory,Code,Year,Meningitis,Alzheimer's Disease and Other Dementias,Parkinson's Disease,Nutritional Deficiencies,Malaria,Drowning,Interpersonal Violence,Maternal Disorders,HIV/AIDS,Drug Use Disorders,Tuberculosis,Cardiovascular Diseases,Lower Respiratory Infections,Neonatal Disorders,Alcohol Use Disorders,Self-harm,Exposure to Forces of Nature,Diarrheal Diseases,Environmental Heat and Cold Exposure,Neoplasms,Conflict and Terrorism,Diabetes Mellitus,Chronic Kidney Disease,Poisonings,Protein-Energy Malnutrition,Road Injuries,Chronic Respiratory Diseases,Cirrhosis and Other Chronic Liver Diseases,Digestive Diseases,"Fire, Heat, and Hot Substances",Acute Hepatitis
0,Afghanistan,AFG,1990,2159,1116,371,2087,93,1370,1538,2655,34,93,4661,44899,23741,15612,72,696,0,4235,175,11580,1490,2108,3709,338,2054,4154,5945,2673,5005,323,2985
1,Afghanistan,AFG,1991,2218,1136,374,2153,189,1391,2001,2885,41,102,4743,45492,24504,17128,75,751,1347,4927,113,11796,3370,2120,3724,351,2119,4472,6050,2728,5120,332,3092
2,Afghanistan,AFG,1992,2475,1162,378,2441,239,1514,2299,3315,48,118,4976,46557,27404,20060,80,855,614,6123,38,12218,4344,2153,3776,386,2404,5106,6223,2830,5335,360,3325
3,Afghanistan,AFG,1993,2812,1187,384,2837,108,1687,2589,3671,56,132,5254,47951,31116,22335,85,943,225,8174,41,12634,4096,2195,3862,425,2797,5681,6445,2943,5568,396,3601
4,Afghanistan,AFG,1994,3027,1211,391,3081,211,1809,2849,3863,63,142,5470,49308,33390,23288,88,993,160,8215,44,12914,8959,2231,3932,451,3038,6001,6664,3027,5739,420,3816


In [3]:
#Basic Statistics
print("Shape:", df.shape)
print("Unique Countries:",df['Country/Territory'].nunique())
print("Null Elements: ", df.isna().sum().sum())
print("Earliest Year: ", df['Year'].min())
print("Latest Year: ", df['Year'].max())


Shape: (6120, 34)
Unique Countries: 204
Null Elements:  0
Earliest Year:  1990
Latest Year:  2019


In [4]:
# country_data_url="https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/UID_ISO_FIPS_LookUp_Table.csv"

# country_data = pd.read_csv(country_data_url)
country_data = pd.read_csv("UID.csv")
country_data=country_data[country_data['Province_State'].isnull()][['UID','Country_Region','Lat','Long_']]
country_data.loc[country_data['Country_Region'] == "US",'Country_Region'] = "United States"

In [5]:
df=pd.merge(left=df,right=country_data,left_on="Country/Territory",right_on="Country_Region")

In [6]:
df.columns = list(map(lambda x: x.replace(' ', '_').replace('/','_').replace("'","").replace(",",""),
                                 df.columns))
df.drop(columns=['Country_Region',],inplace=True)
df.rename(columns={'Long_':'Long'},inplace=True)                                 

In [7]:
def display_map(year,column_name):
    df1 = df.loc[df.Year==year].reset_index()
    source = alt.topo_feature(data.world_110m.url, "countries")
    background = alt.Chart(source).mark_geoshape(fill="white").interactive()

    color=column_name+":N"
    tooltip=column_name+":Q"

    foreground = (
        alt.Chart(source).mark_geoshape(
            stroke="black", strokeWidth=0.15
        ).encode(
            color=alt.Color(
                color, scale=alt.Scale(scheme="lightgreyred"), legend=None
                #legend=alt.Legend(type='gradient',title='Legend',gradientLength=100,direction="horizontal", orient='bottom'),
            ),
            tooltip=[
                alt.Tooltip("Country_Territory:N", title="Country"),
                alt.Tooltip(tooltip, title=column_name),
            ],
        ).transform_lookup(
            lookup="id",
            from_=alt.LookupData(df1, "UID", [column_name, "Country_Territory"]),
        ).interactive()
    )

    chart = (
        (background + foreground)
        .configure_view(strokeWidth=0)
        .properties(title=str(year)+ " Deaths Due to " + column_name.replace('_',' ') + " World Map",width=700, height=400)
        .project("naturalEarth1")
    )

    return chart

In [8]:
display_map(1995,'HIV_AIDS')

<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


In [9]:
display_map(2011,'HIV_AIDS')

<VegaLite 4 object>

If you see this message, it means the renderer has not been properly enabled
for the frontend that you are using. For more information, see
https://altair-viz.github.io/user_guide/troubleshooting.html


In [10]:
## choose a year
df1 = df.loc[df.Year==df.Year.max()].reset_index()


source = alt.topo_feature(data.world_110m.url, "countries")
background = alt.Chart(source).mark_geoshape(fill="white")

foreground = (
    alt.Chart(source).mark_geoshape(
        stroke="black", strokeWidth=0.15
    ).encode(
        color=alt.Color(
            "Conflict and Terrorism:N", scale=alt.Scale(scheme="lightgreyred"), legend=None
            #legend=alt.Legend(type='gradient',title='Legend',gradientLength=100,direction="horizontal", orient='bottom'),
        ),
        tooltip=[
            alt.Tooltip("Country_Region:N", title="Country"),
            alt.Tooltip("Conflict and Terrorism:Q", title="Conflict and Terrorism"),
        ],
    ).transform_lookup(
        lookup="id",
        from_=alt.LookupData(df1, "UID", ["Conflict and Terrorism", "Country_Region"]),
    )
)

chart = (
    (background + foreground)
    .configure_view(strokeWidth=0)
    .properties(width=700, height=400)
    .project("naturalEarth1")
)


In [11]:
#join with US
us_population = pd.read_csv("US_population.csv")
us_population.head()

Unnamed: 0,Year,Population
0,1990,249623000
1,1991,252981000
2,1992,256514000
3,1993,259919000
4,1994,263126000


In [12]:
US = df.loc[df.Country_Territory=="United States"].head(50).sort_values(by=['Year'])

In [13]:
pd.merge(left=US,right=us_population,on="Year")

Unnamed: 0,Country_Territory,Code,Year,Meningitis,Alzheimers_Disease_and_Other_Dementias,Parkinsons_Disease,Nutritional_Deficiencies,Malaria,Drowning,Interpersonal_Violence,Maternal_Disorders,HIV_AIDS,Drug_Use_Disorders,Tuberculosis,Cardiovascular_Diseases,Lower_Respiratory_Infections,Neonatal_Disorders,Alcohol_Use_Disorders,Self-harm,Exposure_to_Forces_of_Nature,Diarrheal_Diseases,Environmental_Heat_and_Cold_Exposure,Neoplasms,Conflict_and_Terrorism,Diabetes_Mellitus,Chronic_Kidney_Disease,Poisonings,Protein-Energy_Malnutrition,Road_Injuries,Chronic_Respiratory_Diseases,Cirrhosis_and_Other_Chronic_Liver_Diseases,Digestive_Diseases,Fire_Heat_and_Hot_Substances,Acute_Hepatitis,UID,Lat,Long,Population
0,United States,USA,1990,1830,73079,12895,2795,0,4370,25341,538,27789,5865,2271,879646,72716,19306,6763,33964,230,681,1468,539139,21,47140,33101,1330,2419,48970,106098,37386,78301,5336,202,840,40.0,-100.0,249623000
1,United States,USA,1991,1615,78365,13217,2855,0,4246,26081,525,32425,6151,2172,877798,73655,18307,6868,34014,124,746,1406,544760,159,49037,33885,1282,2481,47833,109587,37417,78694,5204,210,840,40.0,-100.0,252981000
2,United States,USA,1992,1558,83053,13600,2945,0,4069,25307,509,36485,6596,2055,874992,74500,17259,7035,34067,123,816,1371,550919,78,51045,34919,1225,2569,46690,113036,37599,79310,4878,221,840,40.0,-100.0,256514000
3,United States,USA,1993,1550,87768,14301,3148,0,4075,25598,511,40523,7269,1986,895086,77518,16700,7200,34893,254,925,1635,565698,72,54543,36767,1216,2748,47119,119371,38715,81626,5100,232,840,40.0,-100.0,259919000
4,United States,USA,1994,1530,91491,14816,3312,0,3999,24898,524,44959,7862,1877,899691,78885,15899,7347,35122,303,1034,1416,573321,44,57260,38199,1192,2900,47262,123547,39290,82913,4794,243,840,40.0,-100.0,263126000
5,United States,USA,1995,1514,94855,15434,3530,0,3967,23477,489,45213,8466,1781,908922,80421,15042,7484,35211,133,1170,1911,582371,190,60140,39946,1161,3103,47551,128157,40019,84395,4688,257,840,40.0,-100.0,266278000
6,United States,USA,1996,1504,96526,15962,3741,0,3857,21706,508,33684,9135,1679,907009,80927,14531,7578,34804,266,1330,1428,583202,43,62422,41627,1137,3298,47416,131733,40254,85128,4961,264,840,40.0,-100.0,269394000
7,United States,USA,1997,1514,96342,16545,3978,0,3810,20385,534,19425,10111,1584,904335,81097,14439,7701,34622,261,1571,1383,583773,32,64519,43504,1109,3519,47374,135669,40662,86166,4472,275,840,40.0,-100.0,272657000
8,United States,USA,1998,1517,95816,17349,4263,0,3831,19286,508,15921,11243,1512,909266,81393,14632,7924,34395,283,1907,1443,589017,197,67162,46077,1079,3783,47341,141289,41422,87807,4380,289,840,40.0,-100.0,275854000
9,United States,USA,1999,1566,95512,18354,4617,0,3749,18421,575,17187,12894,1450,920654,78509,14776,8298,34091,259,2531,1470,597618,46,70341,49593,1086,4122,47717,148677,42462,90133,4342,323,840,40.0,-100.0,279040000
