In [2]:
"""The objective of the program is to determine the extent to which gross domestic product per capita 
   explains the variability in suicides per capita on a country, region and oecd membership basis.
   Also, the countries and contiguous time period duration to be included in the analysis is automatically 
   determined, i.e. a period greater than or equal to 10 years, which includes the United States and
   Canada and with the minimum amount of countries that have any missing years. Correlation is measured as r2
   using the numpy corrcoef function. The visualization is interactive insofar as the user can change the 
   countries displayed by region for the "countries" r2 scatter plot and trend lines. The program results are
   intended to be a robust initial exploration of the "what" to highlight concerning trends or unexpected 
   relationships which would be investigated in more detail in some future extension of this program.   
"""        

# get libraries

%matplotlib inline
import matplotlib.pyplot as plt
import matplotlib.ticker as mtick

import numpy as np
import pandas as pd

from ipywidgets import interact

import getpass
import os

# export preparation used in testing

path_initial = r"C:\Users"
username = getpass.getuser()

# get data from two data sources

data_kaggle = pd.read_csv('ak_a4_data_kaggle.csv')
data_kaggle['yr_country'] = data_kaggle['year'].astype(str) + data_kaggle['country']
data_world_bank = pd.read_csv( 'ak_a4_data_world_bank.csv' )

################################################################################################################
"""The code in this section of the program determines the countries and contiguous time period duration
   to be included in the analysis, so the list of countries and the period start and end years become inputs 
   used in subsequent code."""

# list functions data preparation

years = np.arange( data_kaggle['year'].min(), data_kaggle['year'].max() + 1 )
countries_only = list( data_kaggle['country'].drop_duplicates() )
yr_countries = list( data_kaggle['yr_country'].drop_duplicates() )

# list of ranges within specified duration and end  

yr_ranges_only = [ [start,end] 
                   for start in years 
                   for end in years
                   if start < end and 
                   (end-start) >= 10 and
                   end >= (years.max()-5) ] 

# list of all possible year-country combinations given the minimum and maximum years for the complete data set  

yr_country_all = [ str(year)+country for year in years for country in countries_only ]               

# year-country combinations which are observations in the actual data set and add a flag with the count column 

yr_country_in = pd.DataFrame([x for x in yr_country_all if x in yr_countries]) 
yr_country_in['count'] = 1
yr_country_in.rename(columns={yr_country_in.columns[0]: 'yr_country'}, inplace=True)

# all year-country combinations that are actual data set observations (in) and not (out) 

yr_country_all_in_out = pd.DataFrame( [[str(year)+country, year, country] 
                        for year in years for country in countries_only] )
yr_country_all_in_out.rename(columns={yr_country_all_in_out.columns[0]:'yr_country', 
                                      yr_country_all_in_out.columns[1]:'year',
                                      yr_country_all_in_out.columns[2]:'country'}, inplace=True)
yr_country_all_in_out = pd.merge(yr_country_all_in_out, yr_country_in, how='outer', on='yr_country')
yr_country_all_in_out['count'] = yr_country_all_in_out['count'].fillna(0)

# loop initialization

list_item = 0
start = yr_ranges_only[list_item][0]
end = yr_ranges_only[list_item][1]
yr_country_all_in_out_w_range = pd.DataFrame([])

# loop to obtain all year-country combinations within the specified year range duration and end as well as range column 

while list_item <= (len(yr_ranges_only) -1) and [start,end] in yr_ranges_only:
    yr_country_all_in_out['range'] = np.where( ( (yr_country_all_in_out['year'] > start) | 
                                                 (yr_country_all_in_out['year'] == start) ) & 
                                               ( (yr_country_all_in_out['year'] < end) | 
                                                 (yr_country_all_in_out['year'] == end) ),
                                               str(start)+'-'+str(end),'')
    yr_country_all_in_out_w_range = yr_country_all_in_out_w_range.append(yr_country_all_in_out)
    
    start = yr_ranges_only[list_item][0]
    end = yr_ranges_only[list_item][1]
    
    list_item = list_item + 1
yr_country_all_in_out_w_range.drop(['yr_country','year'], axis=1, inplace=True)

# obtain list of ranges that contain a missing year for the united states and canada

ranges_missing_US_CAN = yr_country_all_in_out_w_range.loc[ ( ( (yr_country_all_in_out_w_range['country'] == 'United States') & 
                                                               (yr_country_all_in_out_w_range['count'] == 0) ) |
                                                             ( (yr_country_all_in_out_w_range['country'] == 'Canada') & 
                                                               (yr_country_all_in_out_w_range['count'] == 0) ) ) & 
                                                           (yr_country_all_in_out_w_range['range'] != '') ]
ranges_missing_US_CAN.drop(['country','count'], axis=1, inplace=True)
ranges_missing_US_CAN_1 = ranges_missing_US_CAN.drop_duplicates()
ranges_missing_US_CAN_2 = list(ranges_missing_US_CAN_1['range'])

# obtain a count of non-US-CAN countries that have at least one missing year and exclude ranges that US-CAN have missing

ranges_missing_other = yr_country_all_in_out_w_range.loc[ ( (yr_country_all_in_out_w_range['country'] != 'United States') &
                                                            (yr_country_all_in_out_w_range['country'] != 'Canada') & 
                                                            (yr_country_all_in_out_w_range['count'] == 0) & 
                                                            (yr_country_all_in_out_w_range['range'] != '') ) ]
ranges_missing_other_1 = ranges_missing_other.drop_duplicates()
grouper = ranges_missing_other_1.groupby([pd.Grouper(key='range')])
ranges_missing_other_1 = grouper['range'].count().to_frame(name='count').reset_index()
ranges_missing_other_2 = ranges_missing_other_1.loc[ (~ranges_missing_other_1.range.isin(ranges_missing_US_CAN_2)) ]

# obtain in a variable the range with the fewest missing countries due to incomplete observations for the range
# as well as the countries, which will be used later to select rows with the same range and are not in missing countries  

fewest_missing = ranges_missing_other_2.loc[ ranges_missing_other_2['count'].idxmin()]
selection_fewest_missing = fewest_missing['range'] # range
selection_fewest_missing_start = int(selection_fewest_missing[0:4]) # range start
selection_fewest_missing_end = int(selection_fewest_missing[5:9]) # range end
countries_fewest_missing_1 = ranges_missing_other.loc[ ranges_missing_other['range'] == selection_fewest_missing]
countries_fewest_missing_2 = list(countries_fewest_missing_1['country'].drop_duplicates()) # countries 

################################################################################################################
"""The code in this section of the program groups by country, region and oecd membership to aggregate suicides,
   gdp and population in order to calculate suicides and gdp per capita by said groups.""" 

# merge two data sets and obtain a subset based on range parameters specified previously

data = pd.merge(data_kaggle, data_world_bank, how='outer', on='country')
data1 = data.loc[ (~data.country.isin(countries_fewest_missing_2)) &
                  (data.year.isin(np.arange(selection_fewest_missing_start,selection_fewest_missing_end+1))) ].copy()
data1['yr_region'] = data1['year'].astype(str) + data1['region']
data1['yr_oecd'] = data1['year'].astype(str) + data1['oecd']

# prepare data for country correlation analysis

country_grouper = data1.groupby([pd.Grouper(key='yr_country')])
country_suicides_by_yr = country_grouper['suicides'].sum().to_frame(name='suicides').reset_index()
country_population_by_yr = country_grouper['population'].sum().to_frame(name='population').reset_index()

country_gdp_by_yr = data1[ ['yr_country', 'yr_region', 'yr_oecd', 'year', 'country', 'gdp', 'region', 'oecd'] ].drop_duplicates()

country_annual_data = pd.merge(country_suicides_by_yr, country_population_by_yr, how='outer', on='yr_country')
country_annual_data1 = pd.merge(country_annual_data, country_gdp_by_yr, how='outer', on='yr_country')
country_annual_data1['suicides per capita'] = country_annual_data1['suicides']/(country_annual_data1['population'])
country_annual_data1['gdp per capita'] = country_annual_data1['gdp']/(country_annual_data1['population'])

# prepare data for region correlation analysis

region_grouper = country_annual_data1.groupby([pd.Grouper(key='yr_region')])
region_suicides_by_yr = region_grouper['suicides'].sum().to_frame(name='suicides').reset_index()
region_population_by_yr = region_grouper['population'].sum().to_frame(name='population').reset_index()
region_gdp_by_yr = region_grouper['gdp'].sum().to_frame(name='gdp').reset_index()

region_for_merge = data1[ ['yr_region', 'region'] ].drop_duplicates()

region_annual_data = pd.merge(region_suicides_by_yr, region_population_by_yr, how='outer', on='yr_region')
region_annual_data1 = pd.merge(region_annual_data, region_gdp_by_yr, how='outer', on='yr_region')
region_annual_data2 = pd.merge(region_annual_data1, region_for_merge, how='outer', on='yr_region')
region_annual_data2['suicides per capita'] = region_annual_data2['suicides']/(region_annual_data2['population'])
region_annual_data2['gdp per capita'] = region_annual_data2['gdp']/(region_annual_data2['population'])

# prepare data for oecd correlation analysis

oecd_grouper = country_annual_data1.groupby([pd.Grouper(key='yr_oecd')])
oecd_suicides_by_yr = oecd_grouper['suicides'].sum().to_frame(name='suicides').reset_index()
oecd_population_by_yr = oecd_grouper['population'].sum().to_frame(name='population').reset_index()
oecd_gdp_by_yr = oecd_grouper['gdp'].sum().to_frame(name='gdp').reset_index()

oecd_for_merge = data1[ ['yr_oecd', 'oecd'] ].drop_duplicates()

oecd_annual_data = pd.merge(oecd_suicides_by_yr, oecd_population_by_yr, how='outer', on='yr_oecd')
oecd_annual_data1 = pd.merge(oecd_annual_data, oecd_gdp_by_yr, how='outer', on='yr_oecd')
oecd_annual_data2 = pd.merge(oecd_annual_data1, oecd_for_merge, how='outer', on='yr_oecd')
oecd_annual_data2['suicides per capita'] = oecd_annual_data2['suicides']/(oecd_annual_data2['population'])
oecd_annual_data2['gdp per capita'] = oecd_annual_data2['gdp']/(oecd_annual_data2['population'])

# initialize loop for country correlation analysis

countries_df = data1['country'].drop_duplicates().reset_index().drop(columns={'index'})
countries_list = list(countries_df['country'])
counter = 0
country_results_correlation = pd.DataFrame([])

# correlation analysis loop to append results for each country to a single data frame

while counter < len(countries_list):
    country = countries_list[counter]
    country_correlation_data = country_annual_data1.loc[ (country_annual_data1['country'] == country) ].copy()
    x = country_correlation_data['gdp per capita']
    y = country_correlation_data['suicides per capita']
    r2 = np.corrcoef(x,y)[0,1]
    countries_df1 = countries_df.drop(  countries_df[ countries_df['country'] != country ].index)
    countries_df1.rename(columns={'country':'name'}, inplace=True)
    countries_df1['r2: gdp v suicides'] = r2
    country_results_correlation = country_results_correlation.append(countries_df1)
    counter = counter + 1 

country_results_correlation['category'] = 'country'    
    
# initialize loop for region correlation analysis

regions_df = data1['region'].drop_duplicates().reset_index().drop(columns={'index'})
regions_list = list(regions_df['region'])
counter = 0
region_results_correlation = pd.DataFrame([])

# correlation analysis loop to append results for each region to a single data frame

while counter < len(regions_list):
    region = regions_list[counter]
    region_correlation_data = region_annual_data2.loc[ (region_annual_data2['region'] == region) ].copy()
    x = region_correlation_data['gdp per capita']
    y = region_correlation_data['suicides per capita']
    r2 = np.corrcoef(x,y)[0,1]
    regions_df1 = regions_df.drop(  regions_df[ regions_df['region'] != region ].index)
    regions_df1.rename(columns={'region':'name'}, inplace=True)
    regions_df1['r2: gdp v suicides'] = r2
    region_results_correlation = region_results_correlation.append(regions_df1)
    counter = counter + 1     

region_results_correlation['category'] = 'region'    
    
# initialize loop for oecd correlation analysis

oecd_df = data1['oecd'].drop_duplicates().reset_index().drop(columns={'index'})
oecd_list = list(oecd_df['oecd'])
counter = 0
oecd_results_correlation = pd.DataFrame([])

# correlation analysis loop to append results for each oecd to a single data frame

while counter < len(oecd_list):
    oecd = oecd_list[counter]
    oecd_correlation_data = oecd_annual_data2.loc[ (oecd_annual_data2['oecd'] == oecd) ].copy()
    x = oecd_correlation_data['gdp per capita']
    y = oecd_correlation_data['suicides per capita']
    r2 = np.corrcoef(x,y)[0,1]
    oecd_df1 = oecd_df.drop(  oecd_df[ oecd_df['oecd'] != oecd ].index)
    oecd_df1.rename(columns={'oecd':'name'}, inplace=True)
    oecd_df1['r2: gdp v suicides'] = r2
    oecd_results_correlation = oecd_results_correlation.append(oecd_df1)
    counter = counter + 1     
 
oecd_results_correlation['category'] = 'membership'

# concatenate results of correlation analysis

all_results_correlation = pd.concat([oecd_results_correlation, region_results_correlation, 
                                     country_results_correlation],axis=0, sort=False)
all_results_correlation['abs_r2'] = np.round(np.abs(all_results_correlation['r2: gdp v suicides']),decimals=2)
all_results_correlation['pos_neg'] = np.where(all_results_correlation['r2: gdp v suicides'] < 0, 'negative', 'positive')

################################################################################################################
"""The code in this section of the program creates the visualizations that summarize the absolute correlation strength
   by a count of group members (histogram), actual correlation values to distinguish positive and negative relationships 
   (scatter plot) and suicides per capita trend over the period (line graph)."""

# scatter plot for country data preparation

country_r2_1 = all_results_correlation.loc[ (all_results_correlation['category'] == 'country') ].copy()
data2 = data1[ ['country','abb_country','abb_region'] ].drop_duplicates()
data2.rename(columns={'country':'name'},inplace=True)
country_r2_2 = pd.merge(country_r2_1, data2, how='outer', on='name')

@interact
def scatter_country(Region = country_r2_2['abb_region'].unique()):
    
    # figure formatting

    fig = plt.figure(num = 1, 
                 figsize = (15, 15), 
                 dpi = 75, 
                 facecolor = 'whitesmoke', 
                 edgecolor = 'k')
    fig_title = fig.suptitle('Suicides Per Capita & Correlation with GDP\n'+
                              str(data1['year'].min()) + ' to ' + str(data1['year'].max()),
                              fontsize=16, color='midnightblue', fontweight='bold')
    fig_title.set_position([0.5, 1.02])
    
    # oecd correlation strength histogram

    oecd_abs_r2 = all_results_correlation.loc[ (all_results_correlation['category'] == 'membership') ].copy()
    oecd_abs_r2 = oecd_abs_r2['abs_r2']

    ax1 = fig.add_subplot(3,3,1)
    ax1_bins = 2
    ax1.hist(oecd_abs_r2, bins = ax1_bins, edgecolor='black', linewidth=1.2, color='lightsteelblue')

    ax1.set_title('OECD Membership\n \n',
                  fontdict={'fontweight' :'bold'})

    plt.xticks(rotation=0)
    plt.xticks(np.arange(np.min(oecd_abs_r2),
                         np.max(oecd_abs_r2),
                         (np.max(oecd_abs_r2)-
                          np.min(oecd_abs_r2))/ax1_bins))
    ax1.set_xlabel('| r2 |', color='midnightblue',  fontweight='bold')

    plt.yticks(np.arange(0,2,1))
    ax1.set_ylabel('count', color='midnightblue', fontweight='bold')
    plt.gca().yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

    # region correlation strength histogram 

    region_abs_r2 = all_results_correlation.loc[ (all_results_correlation['category'] == 'region') ].copy()
    region_abs_r2 = region_abs_r2['abs_r2']

    ax2 = fig.add_subplot(3,3,2)
    ax2_bins = 3
    ax2.hist(region_abs_r2, bins = ax2_bins, edgecolor='black', linewidth=1.2, color='lightsteelblue')

    ax2.set_title('Regions\n \n',
                  fontdict={'fontweight' :'bold'})

    plt.xticks(rotation=0)
    plt.xticks(np.arange(np.min(region_abs_r2),
                         np.max(region_abs_r2),
                         (np.max(region_abs_r2)-
                          np.min(region_abs_r2))/ax2_bins))
    ax2.set_xlabel('| r2 |', color='midnightblue', fontweight='bold')

    plt.yticks(np.arange(0,5,1))
    ax2.set_ylabel('count', color='midnightblue', fontweight='bold')
    plt.gca().yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

    # country correlation strength histogram 

    country_abs_r2 = all_results_correlation.loc[ (all_results_correlation['category'] == 'country') ].copy()
    country_abs_r2 = country_abs_r2['abs_r2']

    ax3 = fig.add_subplot(3,3,3)
    ax3_bins = 6
    ax3.hist(country_abs_r2, bins = ax3_bins, edgecolor='black', linewidth=1.2, color='lightsteelblue')

    ax3.set_title('Countries\n \n',
                  fontdict={'fontweight' :'bold'})

    plt.xticks(rotation=0)
    plt.xticks(np.arange(np.min(country_abs_r2),
                         np.max(country_abs_r2),
                         (np.max(country_abs_r2)-
                          np.min(country_abs_r2))/ax3_bins))
    ax3.set_xlabel('| r2 |', color='midnightblue', fontweight='bold')

    plt.yticks(np.arange(0,23,4))
    ax3.set_ylabel('count', color='midnightblue', fontweight='bold')
    plt.gca().yaxis.set_major_formatter(mtick.StrMethodFormatter('{x:,.0f}'))

    # oecd correlation scatter
    
    oecd_r2_1A = all_results_correlation.loc[ (all_results_correlation['category'] == 'membership') ].copy()

    abb_oecd = data1[ ['oecd', 'abb_oecd']].drop_duplicates()
    abb_oecd.rename(columns= {'oecd':'name'}, inplace=True)

    oecd_r2_1B = pd.merge(oecd_r2_1A, abb_oecd, how='outer', on='name')
    oecd_r2_2 = oecd_r2_1B['r2: gdp v suicides']

    ax4 = fig.add_subplot(3,3,4)
    ax4.scatter(oecd_r2_2, oecd_r2_2, color='black')
    for abb in abb_oecd['abb_oecd'].to_list():
        x = oecd_r2_2.to_list()[abb_oecd['abb_oecd'].to_list().index(abb)]
        y = oecd_r2_2.to_list()[abb_oecd['abb_oecd'].to_list().index(abb)]
        plt.plot(x, y, 'k')
        plt.text(x + 0.05, y, abb, fontsize=10)

    ax4.set_title(' ')
    ax4.set_facecolor('lightsteelblue')

    ax4.spines['right'].set_color(None)
    ax4.spines['top'].set_color(None)
    ax4.spines['bottom'].set_position('zero')
    ax4.spines['bottom'].set_color(None)
    ax4.spines['left'].set_position('zero')

    plt.tick_params(bottom=False)
    ax4.text( 0, -1.2, 'category',
            horizontalalignment='center',
            verticalalignment='bottom',
            fontsize=10, color='midnightblue', fontweight='bold')
    plt.xticks(np.arange(-1,1.1,0.1), rotation=90,  fontsize=7, color='lightsteelblue')
    plt.yticks(np.arange(-1,1.1,0.1), rotation=0,  fontsize=9)
    ax4.text( -1.2, 0, 'r2',
            horizontalalignment='left',
            verticalalignment='center',
            fontsize=10, color='midnightblue', fontweight='bold')

    # region correlation scatter

    region_r2_1A = all_results_correlation.loc[ (all_results_correlation['category'] == 'region') ].copy()

    abb_region = data1[ ['region', 'abb_region'] ].drop_duplicates()
    abb_region = abb_region.loc[ abb_region['region'].isin( list(region_r2_1A['name'])) ].copy()
    abb_region.rename(columns= {'region':'name'}, inplace=True)

    region_r2_1B = pd.merge(region_r2_1A, abb_region, how='outer', on='name')
    region_r2_2 = region_r2_1B['r2: gdp v suicides']

    ax5 = fig.add_subplot(3,3,5)
    ax5.scatter(region_r2_2, region_r2_2, color='black')
    for abb in abb_region['abb_region'].to_list():
        x = region_r2_2.to_list()[abb_region['abb_region'].to_list().index(abb)]
        y = region_r2_2.to_list()[abb_region['abb_region'].to_list().index(abb)]
        plt.plot(x, y, 'k')
        plt.text(x + 0.05, y, abb, fontsize=10)

    ax5.set_title(' ')
    ax5.set_facecolor('lightsteelblue')

    ax5.spines['right'].set_color(None)
    ax5.spines['top'].set_color(None)
    ax5.spines['bottom'].set_position('zero')
    ax5.spines['bottom'].set_color(None)
    ax5.spines['left'].set_position('zero')

    plt.tick_params(bottom=False)
    ax5.text( 0, -1.2, 'category',
            horizontalalignment='center',
            verticalalignment='bottom',
            fontsize=10, color='midnightblue', fontweight='bold')
    plt.xticks(np.arange(-1,1.1,0.1), rotation=90,  fontsize=7, color='lightsteelblue')
    plt.yticks(np.arange(-1,1.1,0.1), rotation=0,  fontsize=9)
    ax5.text( -1.2, 0, 'r2',
            horizontalalignment='left',
            verticalalignment='center',
            fontsize=10, color='midnightblue', fontweight='bold')

    # country correlation scatter

    country_r2_3 = country_r2_2.loc[ country_r2_2['abb_region'] == Region ].copy() 
    country_r2_scatter = country_r2_3['r2: gdp v suicides']
    country_r2_abb = country_r2_3['abb_country']

    ax6 = fig.add_subplot(3,3,6)
    ax6.scatter(country_r2_scatter, country_r2_scatter, color='black')
    for abb in country_r2_abb.to_list():
        x = country_r2_scatter.to_list()[country_r2_abb.to_list().index(abb)]
        y = country_r2_scatter.to_list()[country_r2_abb.to_list().index(abb)]
        plt.plot(x, y, 'k')
        plt.text(x + 0.05, y, abb, fontsize=10)

    ax6.set_title(' ')
    ax6.set_facecolor('lightsteelblue')
    ax6.spines['right'].set_color(None)
    ax6.spines['top'].set_color(None)
    ax6.spines['bottom'].set_position('zero')
    ax6.spines['bottom'].set_color(None)
    ax6.spines['left'].set_position('zero')

    plt.tick_params(bottom=False)
    ax6.text( 0, -1.2, 'category',
              horizontalalignment='center',
              verticalalignment='bottom',
              fontsize=10, color='midnightblue', fontweight='bold')
    plt.xticks(np.arange(-1,1.1,0.1), rotation=90,  fontsize=7, color='lightsteelblue')
    plt.yticks(np.arange(-1,1.1,0.1), rotation=0,  fontsize=9)
    ax6.text( -1.2, 0, 'r2',
             horizontalalignment='left',
             verticalalignment='center',
             fontsize=10, color='midnightblue', fontweight='bold')
    
    # oecd suicides per capita by year plot
    
    member = oecd_annual_data2.loc[ (oecd_annual_data2['oecd'] == 'Member')].copy()
    nonmember = oecd_annual_data2.loc[ (oecd_annual_data2['oecd'] == 'Non-Member')].copy()
    
    ax7 = fig.add_subplot(3,3,7)
    ax7.plot(list(data1['year'].unique()), list(member.iloc[:,5]), label='Yes', color='midnightblue', linewidth=2.0)
    ax7.plot(list(data1['year'].unique()), list(nonmember.iloc[:,5]), label='No', color='k', linewidth=2.0)
    
    ax7.set_facecolor('lightsteelblue')

    ax7.set_ylabel('suicides per capita', color='midnightblue', fontweight='bold')
    
    ax7.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=1)
    
    # regional suicides per capita by year plot
    
    LA_CAR = region_annual_data2.loc[ (region_annual_data2['region'] == 'Latin America & Caribbean')].copy()
    EUR_CA = region_annual_data2.loc[ (region_annual_data2['region'] == 'Europe & Central Asia')].copy()
    ME_NA = region_annual_data2.loc[ (region_annual_data2['region'] == 'Middle East & North Africa')].copy()
    N_A = region_annual_data2.loc[ (region_annual_data2['region'] == 'North America')].copy()
    EA_PAC = region_annual_data2.loc[ (region_annual_data2['region'] == 'East Asia & Pacific')].copy()
    SUB_AFR = region_annual_data2.loc[ (region_annual_data2['region'] == 'Sub-Saharan Africa')].copy()
    
    ax8 = fig.add_subplot(3,3,8)
    ax8.plot(list(data1['year'].unique()), list(LA_CAR.iloc[:,5]), label='LA_CAR', color='midnightblue', linewidth=2.0)
    ax8.plot(list(data1['year'].unique()), list(EUR_CA.iloc[:,5]), label='EUR_CA', color='k', linewidth=2.0)
    ax8.plot(list(data1['year'].unique()), list(ME_NA.iloc[:,5]), label='ME_NA', color='brown', linewidth=2.0)
    ax8.plot(list(data1['year'].unique()), list(N_A.iloc[:,5]), label='N_A', color='limegreen', linewidth=2.0)
    ax8.plot(list(data1['year'].unique()), list(EA_PAC.iloc[:,5]), label='EA_PAC', color='darkorange', linewidth=2.0)
    ax8.plot(list(data1['year'].unique()), list(SUB_AFR.iloc[:,5]), label='SUB_AFR', color='darkmagenta', linewidth=2.0)

    ax8.set_facecolor('lightsteelblue')

    ax8.set_ylabel('suicides per capita', color='midnightblue', fontweight='bold')
    
    ax8.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=3)
    
    # country suicides per capita by year plot
    
    country_annual_data1['gdp per capita'] = country_annual_data1['gdp']/(country_annual_data1['population'])
    country_annual_data1.rename(columns={'country':'name'},inplace=True)
    country_annual_data2 = pd.merge(country_annual_data1, data2, how='outer', on='name')
    country_annual_data3 = country_annual_data2.loc[ country_annual_data2['abb_region'] == Region ].copy() 
    countries = country_annual_data3['abb_country'].unique()
    
    ax9 = fig.add_subplot(3,3,9)
    for country in list(countries):    
        country_annual_data4 = country_annual_data3.loc[ country_annual_data3['abb_country'] == country ].copy() 
        ax9.plot(list(country_annual_data3['year'].unique()), list(country_annual_data4.iloc[:,10]), label=country, linewidth=2.0)
        ax9.legend(loc='upper center', bbox_to_anchor=(0.5, -0.05), ncol=4)
        
    ax9.set_facecolor('lightsteelblue')

    ax9.set_ylabel('suicides per capita', color='midnightblue', fontweight='bold')
    
    # chart spacing
    fig.tight_layout(pad=1.5)

################################################################################################################
# The code in this section of the program is used to validate data results during development, so as to confirm 
# that the coding logic is correct and should be "commented out" when running the program.

# #export for testing
# path_complete = os.path.join(path_initial,  username, "Desktop", "region_annual_data1.csv") # update path
# region_annual_data1.to_csv(path_complete) # update object

interactive(children=(Dropdown(description='Region', options=('LA_CAR', 'EUR_CA', 'ME_NA', 'N_A', 'EA_PAC', 'S…