<div style="background:#E20000; color: white; font-size: 30px; text-align: center; width: auto; height: auto; padding:5px; padding-left:20px; margin:2px;text-shadow: 2px 2px 5px rgba(1, 1, 1, 0.4); border-radius:10px; @import url('https://fonts.googleapis.com/css2?family=Bitter:ital,wght@0,100..900;1,100..900&display=swap'); font-family: 'Bitter', serif;">Suicide Rate: Exploratory Data Analysis</div>


In [3]:
# importing libraries
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import plotly.express as px
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots

warnings.filterwarnings('ignore')

# data loading and cleaning
df = pd.read_csv("/kaggle/input/suicide-rates-overview-1985-to-2016/master.csv")
df.drop(columns=['country-year', 'HDI for year'], inplace=True, axis=1)
df.rename(columns={" gdp_for_year ($) ":"gdp_for_year ($)"}, inplace=True)
df['gdp_for_year ($)'] = df['gdp_for_year ($)'].str.replace(',','')

In [4]:
# Preparing data for suicides over the year
temp_df_total = df.groupby('year')['suicides_no'].sum().reset_index().rename(columns={"year": "Year", "suicides_no": "Suicides"})

# Preparing data for suicides by gender
temp_df_gender = df.groupby(by=['year', 'sex'])['suicides_no'].sum().reset_index().rename(columns={"year": "Year", 'sex': "Gender", "suicides_no": "Suicides"})

# Preparing data for suicides by age group
temp_df_age = df.groupby(by=['year', 'age'])['suicides_no'].sum().reset_index().rename(columns={"year": "Year", 'age': "Age Group", "suicides_no": "Suicides"})


fig = px.line(
    temp_df_total, x='Year', y='Suicides', 
    markers=True, title='Suicides Over the Year'
)

# Add line plot for suicides by gender
for gender in temp_df_gender['Gender'].unique():
    gender_df = temp_df_gender[temp_df_gender['Gender'] == gender]
    fig.add_trace(
        go.Scatter(
            x=gender_df['Year'],
            y=gender_df['Suicides'],
            mode='lines+markers',
            name=gender,
            visible=False  
        )
    )

# Add line plot for suicides by age group
for age_group in temp_df_age['Age Group'].unique():
    age_df = temp_df_age[temp_df_age['Age Group'] == age_group]
    fig.add_trace(
        go.Scatter(
            x=age_df['Year'],
            y=age_df['Suicides'],
            mode='lines+markers',
            name=age_group,
            visible=False  
        )
    )

# Update traces for the initial "Suicides Over the Year" plot
fig.update_traces(
    line=dict(color='green', width=3), 
    selector=dict(name='Suicides Over the Year')  
)

# Create button options for toggling between plots
fig.update_layout(
    updatemenus=[
        dict(
            type="buttons",
            direction="right",
            x=0.5,
            xanchor="center",
            y=1.15,  
            yanchor="top",
            buttons=list([
                dict(
                    label="Total Suicides",
                    method="update",
                    args=[{"visible": [True] + [False] * (len(temp_df_gender['Gender'].unique()) + len(temp_df_age['Age Group'].unique()))},
                          {"title": "Worldwide Suicides Over the Year"}]
                ),
                dict(
                    label="By Gender",
                    method="update",
                    args=[{"visible": [False] + [True] * len(temp_df_gender['Gender'].unique()) + [False] * len(temp_df_age['Age Group'].unique())},
                          {"title": "Worldwide Suicides By Gender"}]
                ),
                dict(
                    label="By Age Group",
                    method="update",
                    args=[{"visible": [False] + [False] * len(temp_df_gender['Gender'].unique()) + [True] * len(temp_df_age['Age Group'].unique())},
                          {"title": "Worldwide Suicides By Age Group"}]
                )
            ]),
        )
    ],
    title={'text': 'Suicides Over the Year', 'y': 0.95, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top'},
    xaxis_title='Year',
    yaxis_title='Number of Suicides',
    title_font_size=20,
)

fig.show()


In [5]:
# Preparing data for suicides by gender
gender_counts = df.groupby('sex')['suicides_no'].sum().reset_index().rename(columns={'sex': 'Gender', 'suicides_no': 'Suicides'})

# Preparing data for suicides by age group
age_group_counts = df.groupby('age')['suicides_no'].sum().reset_index().rename(columns={'age': 'Age Group', 'suicides_no': 'Suicides'})

# Create a subplot figure with 1 row and 2 columns
fig = make_subplots(rows=1, cols=2, specs=[[{'type': 'pie'}, {'type': 'pie'}]],
                    subplot_titles=('Suicides by Gender', 'Suicides by Age Group'))

# Add pie chart for gender
fig.add_trace(
    go.Pie(
        labels=gender_counts['Gender'],
        values=gender_counts['Suicides'],
        name='Gender',
        hole=0.3, 
        textinfo='label+percent',
        showlegend=False,
        marker=dict(colors=px.colors.sequential.Viridis[:len(gender_counts)]),
        domain=dict(x=[0, 0.4])  #
    ),
    row=1, col=1
)

# Add pie chart for age group
fig.add_trace(
    go.Pie(
        labels=age_group_counts['Age Group'],
        values=age_group_counts['Suicides'],
        name='Age Group',
        hole=0.3,  
        textinfo='label+percent',  
        showlegend=False,
        marker=dict(colors=px.colors.sequential.Viridis[:len(age_group_counts)]),
        domain=dict(x=[0.6, 1]) 
    ),
    row=1, col=2
)

# Update layout to add a title and adjust layout
fig.update_layout(
    title_text='Suicide Distribution by Gender and Age Group',
    title_x=0.5,  
)

fig.show()


In [6]:
# Preparing data for suicides by country
country_suicides = df.groupby('country')['suicides_no'].sum().reset_index().rename(columns={'country': 'Country', 'suicides_no': 'Suicides'})

fig = px.choropleth(
    country_suicides,
    locations='Country',
    locationmode='country names',
    color='Suicides',
    hover_name='Country',
    color_continuous_scale='Viridis',  
    title='Suicides by Country')

fig.update_geos(
    showcoastlines=True,
    coastlinecolor="Black",
    showland=True,
    landcolor="lightgray",
    projection_type='natural earth',
)

fig.update_layout(
    title={'x': 0.5, 'xanchor': 'center'},
    coloraxis_colorbar=dict(
        title="Suicides",
        tickprefix='',
        ticks='outside'
    ),
    coloraxis_showscale=False,
    margin={"r":0,"t":50,"l":0,"b":0} 
)

fig.show()


In [7]:
# Preparing data for suicides by country and sorting in decreasing order
temp_df = df.groupby('country')['suicides_no'].sum().reset_index().rename(columns={"country": "Country", "suicides_no": "Suicides"})
temp_df = temp_df.sort_values(by='Suicides', ascending=False)

# Create the bar plot
fig = px.bar(
    temp_df.nlargest(20, 'Suicides'),
    y='Suicides',
    x='Country',
    orientation='v',  # Horizontal bar plot
    title='Top 20 Countries With Large Suicide Rates',
    color='Suicides',
    color_continuous_scale='Viridis'
)

# Update layout for scrollable and better appearance
fig.update_layout(
    title={'x': 0.5, 'xanchor': 'center'},
    xaxis={'title': 'Number of Suicides'},
    yaxis={'title': 'Country'},
    margin=dict(l=100, r=50, t=50, b=50),
    coloraxis_showscale=False,
)



fig.show()


In [39]:
# Group by country and calculate the mean of suicides/100k pop
grouped_df = df.groupby('country')['suicides/100k pop'].mean().reset_index().sort_values('suicides/100k pop', ascending=False)

fig = px.bar(grouped_df, x='country', y='suicides/100k pop',
             title='Average Suicides per 100k Population by Country',
             labels={'suicides/100k pop': 'Suicides per 100k Population'},
             color='suicides/100k pop', color_continuous_scale='Viridis')

fig.update_layout(title={'x': 0.5, 'xanchor': 'center'},
                  xaxis_title='Country',
                  yaxis_title='Suicides/100k Population',
                  xaxis_tickangle=-45,
                 coloraxis_showscale=False,)

fig.show()


In [40]:
# Group by country and age and calculate the total number of suicides
grouped_df = df.groupby(by=['country', 'age'])['suicides_no'].sum().reset_index()

fig = px.bar(grouped_df, x='age', y='suicides_no', color='country', 
             title='Number of Suicides by Age Group and Country',
             labels={'suicides_no': 'Number of Suicides', 'age': 'Age Group'},
             color_discrete_sequence=px.colors.qualitative.Plotly, 
             barmode='stack')

fig.update_layout(title={'x': 0.5, 'xanchor': 'center'},
                  xaxis_title='Age Group',
                  yaxis_title='Number of Suicides',
                  xaxis_tickangle=-45,
                  legend_title='Country')

fig.show()