# Plots of article distribution per country

In [6]:
import pandas as pd
import plotly.express as px
import numpy as np
import pycountry

In [39]:
#Path definition
root = './all_selected/'
air_pollution_path = root + 'air_pollution.csv'
heat_island_path = root + 'UHI.csv'
flooding_path = root + 'flooding.csv'
landslides_path = root + 'landslides.csv'

#Reading files
air_pollution = pd.read_csv(air_pollution_path, index_col=0)
heat_island = pd.read_csv(heat_island_path, index_col=0)
flooding = pd.read_csv(flooding_path, index_col=0)
landslides = pd.read_csv(landslides_path, index_col=0)

In [20]:
#Function to get the number of articles per country according to the first author affiliation and all authors affiliations
#The results are two dataframes 'country_count_all_instances' and 'country_count_first_instances'
def get_counts(data):
    countries = {}
    for country in pycountry.countries:
        countries[country.name] = country.alpha_3
    country_count_all = {}
    country_count_first = {}
    for index, row in data.iterrows():
        if row['Include'] == 'Yes':
            article_countries = row['Country'].split(',')
            for country_name in article_countries:
                country = countries.get(country_name, 'Unknown code')
                if country == 'Unknown code':
                    if country_name == 'Iran':
                        country = 'IRN'
                    elif country_name == 'South Korea':
                        country = 'KOR'
                    elif country_name == 'Czech Republic':
                        country = 'CZE'
                    elif country_name == 'Taiwan':
                        country = 'TWN'
                    elif country_name == 'Peoples R China':
                        country = 'CHN'
                    elif country_name == 'England' or country_name == 'Scotland':
                        country = 'GBR'
                    elif country_name == 'Macau':
                        country = 'MAC'
                    elif country_name == 'North Korea':
                        country = 'PRK'
                    elif country_name == 'Vietnam':
                        country = 'VNM'
                    elif country_name == 'Turkiye' or country_name == 'Turkey':
                        country = 'TUR'
                    elif country_name == 'Democratic Republic Congo':
                        country = 'COD'
                    elif country_name == 'Moldova':
                        country = 'MDA'
                    #else:
                        #print(country_name)
                        #print(row)
                if country in country_count_all:
                        country_count_all[country] = country_count_all[country] + 1
                else:
                    country_count_all[country] = 1
            country_name = article_countries[0]
            country = countries.get(country_name, 'Unknown code')
            if country == 'Unknown code':
                if country_name == 'Iran':
                    country = 'IRN'
                elif country_name == 'South Korea':
                    country = 'KOR'
                elif country_name == 'Czech Republic':
                    country = 'CZE'
                elif country_name == 'Taiwan':
                    country = 'TWN'
                elif country_name == 'Peoples R China':
                    country = 'CHN'
                elif country_name == 'England' or country_name == 'Scotland':
                    country = 'GBR'
                elif country_name == 'Macau':
                    country = 'MAC'
                elif country_name == 'North Korea':
                    country = 'PRK'
                elif country_name == 'Vietnam':
                    country = 'VNM'
                elif country_name == 'Turkiye' or country_name == 'Turkey':
                    country = 'TUR'
                elif country_name == 'Democratic Republic Congo':
                    country = 'COD'
                elif country_name == 'Moldova':
                    country = 'MDA'
                #else:
                    #print(country_name)
            if country in country_count_first:
                    country_count_first[country] = country_count_first[country] + 1
            else:
                country_count_first[country] = 1

    country_count = dict(sorted(country_count_all.items(), key=lambda item: item[1], reverse=True))
    country_count_first = dict(sorted(country_count_first.items(), key=lambda item: item[1], reverse=True))
    country_count_all_instances = pd.DataFrame.from_dict(country_count, orient='index').reset_index().rename(columns={'index':'country',0:'count'})
    country_count_first_instances = pd.DataFrame.from_dict(country_count_first, orient='index').reset_index().rename(columns={'index':'country',0:'count'})
    return country_count_all_instances, country_count_first_instances

In [8]:
config = {
  'toImageButtonOptions': {
    'format': 'png', # one of png, svg, jpeg, webp
    'filename': 'custom_image',
    'height': 1000,
    'width': 1800,
    'scale': 1 # Multiply title/legend/axis/canvas sizes by this factor
  }
}

In [60]:
#This function displays two country plots of the articles distribution
#The first considers the affiliations of all authors
#The second considers the affiliation of only the first author
def graph(country_count_all_instances, country_count_first_instance):
    

    # Find the maximum count across both datasets
    #max_count_all = country_count_all_instances['count'].max()
    #max_count_first = country_count_first_instance['count'].max()
    #max_count = max(max_count_all, max_count_first)
    
    fig = px.choropleth(country_count_all_instances, locations="country",
                        color="count", 
                        hover_name="country", # column to add to hover information
                        color_continuous_scale=px.colors.sequential.algae,
                        title="Country count all authors' affliliations",
                        #range_color=(0, max_count)
                       )
    fig2 = px.choropleth(country_count_first_instance, locations="country",
                        color="count", 
                        hover_name="country", # column to add to hover information
                        color_continuous_scale=px.colors.sequential.algae,
                        title="Country count first author' affliliation",
                        #range_color=(0, max_count)
                        )
    #fig.update_layout(width=1800, height=900,paper_bgcolor='rgba(0,0,0,0)',plot_bgcolor='rgba(0,0,0,0)',font=dict(
        #family="Courier New, monospace",
        #size=30,
        #color="RebeccaPurple"
    #))
    #fig2.update_layout(width=1800, height=900,paper_bgcolor='rgba(0,0,0,0)',plot_bgcolor='rgba(0,0,0,0)',font=dict(
        #family="Courier New, monospace",
        #size=30,
        #color="RebeccaPurple"
    #))

    fig.show(config=config)

    fig2.show(config=config)

# Air pollution

In [61]:
country_count_all_instances, country_count_first_instances = get_counts(air_pollution)
#country_count_first_instances.to_excel(results_root + "country_count_heat_island.xlsx")
graph(country_count_all_instances, country_count_first_instances)

# Heat island

In [62]:
country_count_all_instances, country_count_first_instances = get_counts(heat_island)
#country_count_first_instances.to_excel(results_root + "country_count_heat_island.xlsx")
graph(country_count_all_instances, country_count_first_instances)

# Flood

In [64]:
country_count_all_instances, country_count_first_instances = get_counts(flooding)
#country_count_first_instances.to_excel(results_root + "country_count_flood.xlsx")
graph(country_count_all_instances, country_count_first_instances)

# Landslide

In [65]:
country_count_all_instances, country_count_first_instances = get_counts(landslides)
#country_count_first_instances.to_excel(results_root + "country_count_landslide.xlsx")
graph(country_count_all_instances, country_count_first_instances)