### Region - Average  Happiness Score

In [1]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.io as pio
import gmaps
import requests
import json
import os
# Google developer API key
# from api_keys import g_key
g_key = os.get.environ("GOOGLE_AP")
# Reading concatenated csv data into dataframes 
df = pd.read_csv("../Output/suicide_concat.csv")
df.head()

ModuleNotFoundError: No module named 'api_keys'

In [None]:
# Grouping based on Year and Region 
df_region = pd.DataFrame(df.groupby(["Year", "Region"])["Happiness_Score"].mean())
df_region = df_region.reset_index() 
df_region

In [None]:
# Ploting Avg Happiness score of each Region
fig = px.bar(df_region, x="Year", color="Region",
             y='Happiness_Score',
             title="Average Happiness Score of each Region",
             barmode='group',
             color_discrete_sequence=px.colors.qualitative.Antique,
             height=400
            )
fig.update_yaxes(tick0=3, dtick=1)
# fig.update_yaxes(tickvals=[3, 4, 5, 6, 7], tick0=3, dtick=1)
fig.show()

### Happiness vs Suicide Rate of Top 30 Happy Countries form 2015 to 2019

In [None]:
# Making a copy of the concatenated dataframe read from csv
df_suicide = df.copy()

# Creating a list of countries to compare
cmp_country = ['Finland', 'Australia']
# Initializing a dataframe for countries to compare
aus_fin_df = pd.DataFrame()


# Initializing a list for dataframes
df2={}

# Creating a list of years
years = [2015,2016,2017,2018,2019]

# Looping through each year in the list
for year in years:
    
    # Separating the data of each year into separate data frame
    df2['year'] = df_suicide.loc[df_suicide.Year==year].reset_index(drop=True)
    # Sorting based on happiness score
    df2['year'] = df2['year'].sort_values(by="Happiness_Score", ascending=False)

    
    # Collecting data of countries in the countries list to compare
    temp_df = df2['year'].loc[(df2['year']['Country'].isin(cmp_country))]
    # Concatenating each year's data
    aus_fin_df = pd.concat([aus_fin_df, temp_df])
    
    # Taking only the Top 30 rows in each year's dataframe
    df2['year'] = df2['year'].head(30)

    
    # Melting the dataframe to make a new column 'Factor' 
    # Melting keeping Country, Region and Year same and Creating Factor column, and corresponding Happiness_and_Suicide_rate
    df2['year'] = pd.melt(df2['year'], id_vars=['Country','Region','Year'],var_name='Factor',
             value_name='Happiness_and_Suicide_rate', col_level=None)
    
    # Plotting Happiness Score vs Suicide Rate  
    fig = px.bar(df2['year'], x="Country", color="Factor",
                 y='Happiness_and_Suicide_rate',
                 title=f"Happiness score vs Suicide Rate of Top 30 Happy Countries {year}",
                 barmode='group',
                 color_discrete_sequence=px.colors.qualitative.Antique,
                 height=600
                )
    fig.update_yaxes(tick0=3, dtick=1)
    
    # Saving the image files
    pio.write_image(fig, f"../Images/Happiness_Suicide/happiness_suicide_{year}.png")
    fig.show()

In [None]:
# Comparing Australia vs Rank 1, Finland 

# Plotting Suicide rate of in the countries in the cmp_country list
fig = px.line(aus_fin_df, x="Year",
                          y="Suicide_Rate",
                          color='Country',
                          title="Suicide Rate of Australia vs Happiness Rank 1, Finland",
                          color_discrete_sequence=px.colors.qualitative.Antique)
fig.update_xaxes(nticks=5)

# Saving the image file
# pio.write_image(fig, '../Images/Happiness_Suicide/Aus_vs_Fin_Suicide.png')
fig.show()

In [None]:
# Making a copy of the concatenated dataframe read from csv
data_file_df = df.copy()
# Sorting the dataframe based on Suicide Rate
data_file_df = data_file_df.sort_values(by="Suicide_Rate", ascending=False)
data_file_df.head()

In [None]:
# Creating a list of years to extract each years top 30 in suicide rate
years = [2015,2016,2017,2018,2019]

# Initializing a dataframe
data_df = pd.DataFrame()

# Looping through eacg year
for year in years:
    # Collecting only top 30 rows of each year
    temp_df = data_file_df.loc[data_file_df.Year==year].head(30).reset_index(drop=True)
    # Concatenating it to one dataframe
    data_df = pd.concat([data_df, temp_df])
    
data_df.head()

In [None]:
# Creating empty lists for results to append
lat= []
lon =[]
country_list=[]

countries = data_df["Country"].unique()

# use iterrows to iterate through pandas dataframe
for country in countries:

    try:
        # Target city
        target_city = country

        # Build the endpoint URL
        target_url = ('https://maps.googleapis.com/maps/api/geocode/json?address={0}&key={1}').format(target_city, g_key)

        # Run a request to endpoint and convert result to json
        geo_data = requests.get(target_url).json()
    
        # Extract latitude and longitude
        lat.append(geo_data["results"][0]["geometry"]["location"]["lat"])
        lon.append(geo_data["results"][0]["geometry"]["location"]["lng"])
        country_list.append(country)
        
        print(f"Retrieved coordinates of {country}")
        
    except (KeyError, IndexError):
        print("Missing field/result... skipping.")

In [None]:
# Creating a dataframe with the latitude and Longitude values of Countries
lat_lng_df = pd.DataFrame({"Country": country_list, "Latitude": lat, "Longitude": lon})
# Merging it with the data_df dataframe
data_df = data_df.merge(lat_lng_df, on="Country", how="left")
data_df.head()

In [None]:
import plotly.express as px
fig = px.scatter_geo(data_df, lat="Latitude", lon="Longitude",
                     color="Suicide_Rate", hover_name="Country",
                     title="Top 30 Countries in Suicide Rate",
                     text="Suicide_Rate", animation_frame="Year",
                     projection="natural earth")

# Saving the plot as html file
pio.write_html(fig, '../Images/Happiness_Suicide/suicidetop30.html')
fig.show()