> **Cause of Deaths Analysis**

The dataset contains cause of deaths information around the world from 1990 to 2019.

Key finding topics in this analysis.

1. Percentage of cause of deaths
2. Total number of deaths around the world
3. Top 10 Countries with the highest number of deaths 
4. Top 10 Countries with the lowest number of deaths 
5. Time series of total number of deaths around the world 
6. Time series compare the total number of deaths between top 10 countries
7. Percentage of cause of deaths in New Zealand
8. Time series of total number of deaths in New Zealand 
9. Top 5 cause of deaths in New Zealand
10. Time series of top 5 cause of deaths in New Zealand 
11. Time series to compare the total deaths between New Zealand and Australia 
12. Cause of deaths in New Zealand in 2019
13. Top 5 cause of deaths in New Zealand in 2019
14. Time series of total number of deaths from road injuries in New Zealand
15. Time series of total number of deaths from self-harm in New Zealand
16. Time series of data not related to disease in New Zealand
17. Bar chart race of data not related to diseases in New Zealand



# Import Libraries and Read data

In [None]:
# Install the package for bar chart racing

!pip install bar-chart-race

In [None]:
#Import Libraries

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns
import bar_chart_race as bcr


# Ignore any further warnings

import warnings
warnings.filterwarnings('ignore')

In [None]:
#Read Dataset

df = pd.read_csv('/kaggle/input/cause-of-deaths-around-the-world/cause_of_deaths.csv')

In [None]:
df.head()

In [None]:
df.shape

In [None]:
#Check for data type

df.dtypes

In [None]:
#Check for missing data

df.isnull().sum()

In [None]:
#Check for duplicate data

df.duplicated().sum()

In [None]:
#Rename column

df=df.rename(columns={'Country/Territory':'Country'})

df.head()

In [None]:
df['Country'].unique()

In [None]:
print(f'There are  {len(df["Country"].unique())}  countries')

In [None]:
#Check for number of unique records present in the data

df.nunique(axis=0)

In [None]:
#Statistical Information 

df.describe()

# Exploratory Analysis and Visualisation

# 1. Percentage of Cause of Deaths

In [None]:
df.head()

In [None]:
#Create a new column for the total number of deaths (Sum all cause of deaths)

cause_of_deaths = [col for col in df.columns if col not in ('Country', 'Code', 'Year')]

df['Total_deaths'] = df[cause_of_deaths].sum(axis=1)

df.head()

In [None]:
#Find the total number of each disease and rename columns for the chart

disease_df = df[cause_of_deaths].sum().to_frame().reset_index()
disease_df.rename(columns={"index": "Disease", 0:"Total cases"}, inplace=True)
disease_df

In [None]:
fig = px.sunburst(disease_df, 
                  path=['Disease'], 
                  values='Total cases'
                  )

fig.update_traces(textinfo='label+percent parent')    
fig.update_layout(title_text='Percentage of Cause of Deaths', title_x=0.5, font_size=15)
fig.show()

In [None]:
#Another Visualisation by Treemap

fig = px.treemap(disease_df, 
                 path = [px.Constant('Total cases'), 'Disease'], 
                 values = 'Total cases'
                 )

fig.update_traces(textinfo='label+percent parent')    
fig.update_layout(title_text='Percentage of Cause of Deaths', title_x=0.5, font_size=15)
fig.show()

# 2. Total Number of Deaths Around the World

In [None]:
country_df = df.groupby('Country')['Total_deaths'].sum().sort_values(ascending=False).reset_index()
country_df

In [None]:
fig = px.sunburst(country_df, 
                  path=['Country'], 
                  values='Total_deaths'
                  )

fig.update_traces(textinfo='label+percent parent')    
fig.update_layout(title_text='Percentage of Total Number of Deaths Around the World', title_x=0.5, font_size=15)
fig.show()

In [None]:
#Another Visualisation by Treemap

fig = px.treemap(country_df, 
                 path = [px.Constant('Total_deaths'), 'Country'], 
                 values = 'Total_deaths'
                 )

fig.update_traces(textinfo='label+percent parent')    
fig.update_layout(title_text='Percentage of Total Number of Deaths Around the World', title_x=0.5, font_size=15)
fig.show()

In [None]:
#Another visualisation by Choropleth Map

fig = px.choropleth(country_df,
                    locations='Country', 
                    locationmode= 'country names', 
                    color= 'Total_deaths', 
                    range_color=[1,10000000], 
                    hover_name='Country')

fig.update_layout(title={'text' : 'Total Number of Deaths around the world'}, title_x=0.5, font_size=15)
fig.show()

# Interactive Choropleth Map

In [None]:
fig = px.choropleth(df,
                    locations='Country',
                    locationmode= 'country names',
                    color='Total_deaths',
                    range_color=[1,2000000], 
                    hover_name='Country', 
                    animation_frame='Year',
                    width=1000
                   )

fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()

> **You can change the year or click at the botton to play.**

# 3. Top 10 Countries with the Highest Number of Deaths 

In [None]:
#Find the total number of deaths by country.

Top10_deaths = df.groupby('Country')['Total_deaths'].sum().sort_values(ascending=False).head(10).reset_index()

Top10_deaths

In [None]:
plt.figure(figsize=(16,9))

sns.barplot(data = Top10_deaths, 
            x = 'Country', 
            y = 'Total_deaths', 
            palette = 'pastel'
            )

plt.xticks(rotation = 90)
plt.xlabel('Country', fontsize = 12)
plt.ylabel('Total Number of Deaths', fontsize = 12)
plt.title('Top 10 Countries with the Highest Number of Deaths', fontsize =15)

# 4. Top 10 Countries with the Lowest Number of Deaths 

In [None]:
Low10_deaths = df.groupby('Country')['Total_deaths'].sum().sort_values(ascending=True).head(10).reset_index()

Low10_deaths

In [None]:
plt.figure(figsize=(16,9))

sns.barplot(data = Low10_deaths, 
            x = 'Country', 
            y = 'Total_deaths', 
            palette = 'pastel'
            )

plt.xticks(rotation = 90)
plt.xlabel('Country', fontsize = 12)
plt.ylabel('Total Number of Deaths', fontsize = 12)
plt.title('Top 10 Countries with the Lowest Number of Deaths', fontsize =15)

# 5. Time Series of Total Number of Deaths Around the World 

In [None]:
Deaths_by_year = df.groupby('Year')['Total_deaths'].sum().reset_index()

Deaths_by_year

In [None]:
plt.figure(figsize=(16,9))

sns.lineplot(data = Deaths_by_year,
             x='Year',
             y = 'Total_deaths'
            )

plt.xlabel('Year',fontsize =12)
plt.ylabel('Total Number of Deaths',fontsize =12)
plt.title('Time Series of Total Number of Deaths Around the World', fontsize=15)

plt.show()

# 6. Time Series Compare the Total Number of Deaths Between Top 10 Countries

In [None]:
#Select data from Top10 countries that have the highest number of deaths

plt.figure(figsize=(16,9))

for i in Top10_deaths.Country:
    a= df[df['Country']==i]
    sns.lineplot(data=a, x='Year', y='Total_deaths',label=i)
    
plt.xlabel('Year',fontsize =12)
plt.ylabel('Total Number of Deaths',fontsize =12)
plt.title('Time Series Compare the Total Number of Deaths Between Top 10 Countries', fontsize=15)

plt.show()

# New Zealand Analysis

> **As I live in New Zealand, I will be focusing on analysing data for my country :)**

In [None]:
#Create a new data frame of New Zealand

New_Zealand=df[df['Country']=='New Zealand']
New_Zealand.head()

# 7. Percentage of Cause of Deaths in New Zealand

In [None]:
#Find the total number of each disease and rename columns for the chart

NZ_disease = New_Zealand[cause_of_deaths].sum().to_frame().reset_index()
NZ_disease.rename(columns={"index": "Disease", 0:"Total cases"}, inplace=True)
NZ_disease

In [None]:
fig = px.sunburst(NZ_disease, 
                  path=['Disease'], 
                  values='Total cases'
                  )

fig.update_traces(textinfo='label+percent parent')    
fig.update_layout(title_text='Percentage of Cause of Deaths in New Zealand', title_x=0.5, font_size=15)
fig.show()

In [None]:
#Another Visualisation by Treemap

fig = px.treemap(NZ_disease, 
                 path = [px.Constant('Total cases'), 'Disease'], 
                 values = 'Total cases'
                 )

fig.update_traces(textinfo='label+percent parent')    
fig.update_layout(title_text='Percentage of Cause of Deaths in New Zealand', title_x=0.5, font_size=15)
fig.show()

# 8. Time Series of Total Number of Deaths in New Zealand

In [None]:
NZ_Deaths_by_year = New_Zealand.groupby('Year')['Total_deaths'].sum().reset_index()

NZ_Deaths_by_year

In [None]:
plt.figure(figsize=(16,9))

sns.lineplot(data = NZ_Deaths_by_year,
             x='Year',
             y = 'Total_deaths'
            )

plt.xlabel('Year',fontsize =12)
plt.ylabel('Total Number of Deaths',fontsize =12)
plt.title('Time Series of Total Number of Deaths in New Zealand', fontsize=15)

plt.show()

# 9. Top 5 Cause of Deaths in New Zealand

In [None]:
NZ_top5_disease = NZ_disease.groupby('Disease')['Total cases'].sum().sort_values(ascending=False).head(5).reset_index()

NZ_top5_disease

In [None]:
fig = go.Figure(data=[go.Pie(labels=NZ_top5_disease['Disease'],
                             values=NZ_top5_disease['Total cases'],
                             hole=.7,
                             title = 'Top 5 Cause of deaths')])

fig.update_layout(title='Top 5 Cause of Deaths in New Zealand',title_x=0.5, font_size=15, showlegend = False)
fig.update_traces(textposition='outside', textinfo='percent+label')
fig.show()

# 10. Time series of Top 5 Cause of Deaths in New Zealand

In [None]:
New_Zealand.head()

In [None]:
#Create Time series of 5 cause of deaths - Cardiovascular Diseases, Neoplasms, Chronic Respiratory Diseases, Alzheimer's Disease and Other Dementias, Digestive Diseases

fig = go.Figure()
fig.add_trace(go.Scatter(x = New_Zealand['Year'],
                         y = New_Zealand['Cardiovascular Diseases'],
                         mode = 'lines',
                         name = 'Cardiovascular Diseases',
                         marker_color = 'Red',
                         line = dict(dash = 'dashdot')))

fig.add_trace(go.Scatter(x = New_Zealand['Year'],
                         y = New_Zealand['Neoplasms'],
                         mode = 'lines',
                         name = 'Neoplasms',
                         marker_color = 'DarkOrchid',
                         line = dict(dash = 'dot')))

fig.add_trace(go.Scatter(x = New_Zealand['Year'],
                         y = New_Zealand['Chronic Respiratory Diseases'],
                         mode = 'lines',
                         name = 'Chronic Respiratory Diseases',
                         marker_color = 'RoyalBlue'
                         ))

fig.add_trace(go.Scatter(x = New_Zealand['Year'],
                         y = New_Zealand["Alzheimer's Disease and Other Dementias"],
                         mode = 'lines',
                         name = "Alzheimer's Disease and Other Dementias",
                         marker_color = 'Black',
                         line = dict(dash = 'dot')))
                                         

fig.add_trace(go.Scatter(x = New_Zealand['Year'],
                         y = New_Zealand['Digestive Diseases'],
                         mode = 'lines',
                         name = 'Digestive Diseases',
                         marker_color = 'LightSeaGreen',
                         line = dict(dash = 'dash')))
                         
fig.update_layout(title = '<b>Time Series of Top 5 Cause of Deaths in New Zealand<b>',
                  title_x = 0.5,
                  title_font= dict(size = 20),
                  xaxis_title = 'Year',
                  yaxis_title = 'Total Number of Deaths',
                  template = 'plotly_white')

fig.show()

# 11. Time Series to Compare Between New Zealand and Australia

In [None]:
#Create a new data frame of Australia

Australia = df[df['Country']=='Australia']
Australia.head()

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x = New_Zealand['Year'],
                         y = New_Zealand['Total_deaths'],
                         mode = 'lines',
                         name = 'New Zealand',
                         marker_color = 'Red'))

fig.add_trace(go.Scatter(x = Australia['Year'],
                         y = Australia['Total_deaths'],
                         mode = 'lines',
                         name = 'Australia',
                         marker_color = 'Blue'))

fig.update_layout(title = '<b>Time Series to Compare the Between New Zealand and Australia<b>',
                  title_x = 0.5,
                  title_font= dict(size = 20),
                  xaxis_title = 'Year',
                  yaxis_title = 'Total Number of Deaths',
                  template = 'plotly_white')

fig.show()

# 12. Cause of Deaths in New Zealand in 2019

> The latest year from this dataset is 2019. 
> 
> So I would like to know the latest information of cause of deaths in New Zealand.

In [None]:
New_Zealand.tail()

In [None]:
#Make a new data frame of New Zealand year 2019

NZ_2019 = New_Zealand[New_Zealand['Year'] == 2019]
NZ_2019

In [None]:
#Find the total number of each disease and rename columns for the chart

disease_2019 = NZ_2019[cause_of_deaths].sum().to_frame().reset_index()
disease_2019.rename(columns={"index": "Disease", 0:"Total cases"}, inplace=True)
disease_2019

In [None]:
fig = px.treemap(disease_2019, 
                 path = [px.Constant('Total cases'), 'Disease'], 
                 values = 'Total cases'
                 )

fig.update_traces(textinfo='label+percent parent')    
fig.update_layout(title_text='Percentage of Cause of Deaths in New Zealand in 2019', title_x=0.5, font_size=15)
fig.show()

# 13. Top 5 Cause of Deaths in New Zealand in 2019

In [None]:
top5_2019 = disease_2019.groupby('Disease')['Total cases'].sum().sort_values(ascending=False).head(5).reset_index()

top5_2019

In [None]:
plt.figure(figsize=(16,9))

sns.barplot(data = top5_2019, 
            x = 'Total cases', 
            y = 'Disease', 
            palette = 'pastel'
            )

plt.xticks(rotation = 90)
plt.xlabel('Total Number of Deaths', fontsize = 12)
plt.ylabel('Cause of Deaths', fontsize = 12)
plt.title('Top 5 Cause of Deaths in New Zealand in 2019', fontsize =15)

In [None]:
fig = go.Figure(data=[go.Pie(labels=top5_2019['Disease'],
                             values=top5_2019['Total cases'],
                             hole=.7,
                             title = 'Top 5 Cause of deaths')])

fig.update_layout(title='Top 5 Cause of Deaths in New Zealand in 2019',title_x=0.5, font_size=15, showlegend = False)
fig.update_traces(textposition='outside', textinfo='percent+label')
fig.show()

# 14. Time Series of Total Number of Deaths From Road Injuries in New Zealand

In [None]:
plt.figure(figsize=(16,9))

sns.lineplot(New_Zealand,
             x='Year',
             y = 'Road Injuries'
            )

plt.xlabel('Year',fontsize =12)
plt.ylabel('Total Number of Deaths',fontsize =12)
plt.title('Time Series of Total Number of Deaths From Road Injuries in New Zealand', fontsize=15)

plt.show()

# 15. Time Series of Total Number of Deaths From Self-harm in New Zealand

In [None]:
#Try to use another package for Line chart

fig = go.Figure()
fig.add_trace(go.Scatter(x = New_Zealand['Year'],
                         y = New_Zealand['Self-harm'],
                         mode = 'lines',
                         name = 'New Zealand',
                         marker_color = 'Red',
                         line = dict(dash = 'dot')))

fig.update_layout(title = '<b>Time Series of Total Number of Deaths From Self-harm in New Zealand<b>',
                  title_x = 0.5,
                  title_font= dict(size = 20),
                  xaxis_title = 'Year',
                  yaxis_title = 'Total Number of Deaths',
                  template = 'plotly_white')

fig.show()

# 16. Time Series of Data Not Related to Disease in New Zealand

> Excluded the data of column 'Road Injuries' and 'Self-harm'.
> 
> Because the range of the data will be too high, resulting in the line chart being too wide and hard to read.
> 
> So I create the line chart for 'Road Injuries' and 'Self-harm separately, which is not in this chart

In [None]:
interest_data = ['Drowning', 
                 'Interpersonal Violence', 
                 'Drug Use Disorders',
                 'Alcohol Use Disorders',
                 'Environmental Heat and Cold Exposure', 
                 'Fire, Heat, and Hot Substances',
                 'Poisonings']

plt.figure(figsize=(16,9))

for i in interest_data:
    sns.lineplot(data = New_Zealand, 
                 x = 'Year', 
                 y = New_Zealand[i],
                 label = i
                )
    
plt.xlabel('Year',fontsize =12)
plt.ylabel('Total Number of Deaths',fontsize =12)
plt.title('Time Series of Data Not Related to Disease in New Zealand', fontsize=15)

plt.show()

# 17. Bar Chart Race of Data Not Related to Disease in New Zealand

In [None]:
#Create a new data frame for only interested data columns

NZ_race = New_Zealand[interest_data].reset_index(drop=True)

NZ_race.head()

In [None]:
#Set column 'Year' to be the index

NZ_race = NZ_race.set_index(New_Zealand['Year'])
NZ_race

In [None]:
#Bar chart race

bcr.bar_chart_race(NZ_race, 
                   n_bars = 7,
                   orientation='h',
                   sort='desc',
                   period_length=1000,
                   fixed_max=True, fixed_order=False,
                   figsize=(4, 3), period_fmt='Year {x:.0f}', 
                   title='Compare Data Not Related to Disease in NZ')


I tried to make many options for the data visualisation such as

- Bar Chart
- Column Chart
- Line Chart
- Pie Chart
- Donut Chart
- Tree Map
- Clonopleth Map
- Interactive Choropleth Map
- Bar Chart Race


I am very new at Kaggle, so it should be usulful for me to practice and be able to look back at the code in the future. 

Any suggestions or comments would be appreciated. 

Thank you. :)