In [1]:
import pandas as pd
import plotly
import plotly_express as px
from slugify import slugify
from utils.utils import *

tableClasses = ['table', 'table-hover']

# #### Répartition des chercheurs (nombre de co-Pis, co-candidats, collaborateurs, etc.)
chercheurs  = pd.read_excel('data/fnfr_transformation_chercheur-e-s.xlsx')

#### Répartition géographique
# charger le mapping des pays qui sera utilisé pour construire la figure
mappingPays = pd.read_csv('utils/mapping_pays_iso.csv', sep=';', encoding='UTF-8').to_dict('records')
mappingPays = {x['Pays'] : x['Alpha-3 code'] for x in mappingPays}

# Charger les données
affiliations = pd.read_csv('data/affiliationsChercheurs.csv')
affiliations = affiliations[affiliations['Pays'] != '-']
affiliations['Code Alpha-3 Pays'] = affiliations['Pays'].map(mappingPays)

def generate_geo_figure(df: pd.DataFrame) -> plotly.graph_objs._figure.Figure:
    figPays = px.scatter_geo(
        df, 
        locations="Code Alpha-3 Pays",
        locationmode = 'ISO-3',
        hover_name = 'Pays',
        size="count",
        size_max = 50,
        height=440,
        projection = 'equirectangular',
    )

    # Customize the layout
    figPays.update_geos(
        showcoastlines=False,  # Hide coastlines/borders
        showland=True,  # Hide land area color
        landcolor = '#E8E8E8',
        showframe=True,  # Hide frame/borders
        projection_scale = 1.125,  # Adjust the projection scale to fit the map better
        center=dict(lon=20, lat=18),  # Set the center of the map to exclude Antarctica
    )

    figPays = figPays.update_layout( 
        margin=dict(t=0, l=0, r=0, b=0),
        title_x=0.2, 
    )

    return figPays

# Générer les figures
figs = []
tablesFreq = {}
mappingTables = {'count': 'N'}

# Figure - Tous les projets 
# Tableau de fréquence 
freqPays = pd.DataFrame(affiliations['Pays'].value_counts()).reset_index()
freqPays['Code Alpha-3 Pays'] = freqPays['Pays'].map(mappingPays)
freqPays = freqPays[['Pays', 'Code Alpha-3 Pays', 'count']]

with open('figures/geo/all.html', 'w') as f:
    f.write(generate_geo_figure(freqPays).to_html(full_html=False, include_plotlyjs='cdn'))

figs.append(
    {
        'Nom': 'Tout',
        'Fichier': 'figures/geo/all.html'
    }
)

# Create the table to display aside from the figure
tableF = freqPays.rename(columns = mappingTables)
tableF = tableF.sort_values(by=['N'], ascending=[False])[['Pays', 'N']]
tablesFreq[f"figures/geo/all.html"] = tableF.to_html(classes = tableClasses, justify='left', index=False)

# Figures - Par concours
freqPaysConcours = affiliations.groupby(['concours', 'Pays', 'Code Alpha-3 Pays'])['chercheur'].count().reset_index()
freqPaysConcours = freqPaysConcours.rename(columns={'chercheur': 'count'})

for c in freqPaysConcours['concours'].unique():
    subdf = freqPaysConcours[freqPaysConcours['concours'] == c]

    with open(f'figures/geo/{c}.html', 'w') as f:
        f.write(generate_geo_figure(subdf).to_html(full_html=False, include_plotlyjs='cdn'))

    figs.append(
        {
            'Nom': c,
            'Fichier': f'figures/geo/{c}.html'
        }
    ) 

    subdf = subdf[['Pays', 'count']].sort_values(by='count', ascending=False)
    
    # Create the table to display aside from the figure
    tableF = subdf.rename(columns = mappingTables)

    tableF = tableF.sort_values(by=['N'], ascending=[False])
    tablesFreq[f"figures/geo/{c}.html"] = tableF.to_html(classes = tableClasses, justify='left', index=False)

    # Figures - Par projet
    freqPaysProjets = affiliations.groupby(['projet', 'concours', 'Pays', 'Code Alpha-3 Pays'])['chercheur'].count().reset_index()
    freqPaysProjets = freqPaysProjets[freqPaysProjets['concours'] == c]
    freqPaysProjets = freqPaysProjets.rename(columns={'chercheur': 'count'})

    for p in freqPaysProjets['projet'].unique():
        slugifiedName = slugify(p)[:30]

        ssubdf = freqPaysProjets[freqPaysProjets['projet'] == p]

        with open(f'figures/geo/{slugifiedName}.html', 'w') as f:
            f.write(generate_geo_figure(ssubdf).to_html(full_html=False, include_plotlyjs='cdn'))

        figs.append(
            {
                'Nom': f"{c} -- {p}",
                'Fichier': f'figures/geo/{slugifiedName}.html'
            }
        ) 

        ssubdf = ssubdf[['Pays', 'count']].sort_values(by='count', ascending=False)
        
        # Create the table to display aside from the figure
        tableF = ssubdf.rename(columns = mappingTables)

        tableF = tableF.sort_values(by=['N'], ascending=[False])
        tablesFreq[f"figures/geo/{slugifiedName}.html"] = tableF.to_html(classes = tableClasses, justify='left', index=False)

tablesFreq = str(tablesFreq)


In [2]:
affiliationsCanada = affiliations[affiliations['Pays'] == 'Canada']

mapping_provinces = pd.read_csv('utils/mapping_provinces_canada_iso.csv')
mapping_provinces = mapping_provinces[mapping_provinces['Language code'] == 'fr']
mapping_provinces = {x['Subdivision name'] : x['3166-2 code'] for x in mapping_provinces.to_dict('records')}

freqProvinces = pd.DataFrame(affiliationsCanada['Province / Territoire'].value_counts()).reset_index()
freqProvinces['code-ISO province'] = freqProvinces['Province / Territoire'].map(mapping_provinces)

freqProvinces

Unnamed: 0,Province / Territoire,count,code-ISO province
0,Ontario,195,CA-ON
1,Colombie-Britannique,65,CA-BC
2,Québec,59,CA-QC
3,Alberta,44,CA-AB
4,Terre-Neuve-et-Labrador,22,CA-NL
5,Territoires du Nord-Ouest,18,CA-NT
6,Nouvelle-Écosse,12,CA-NS
7,Manitoba,8,CA-MB
8,Yukon,7,CA-YT
9,Saskatchewan,4,CA-SK


In [8]:
mapping_provinces = pd.read_csv('utils/mapping_provinces_canada_iso.csv')
mapping_provinces = mapping_provinces[mapping_provinces['Language code'] == 'en']
mapping_provinces = {x['Subdivision name'] : x['3166-2 code'] for x in mapping_provinces.to_dict('records')}

province_coordinates = pd.read_csv('utils/provinces_canada_coordinates.csv')
province_coordinates['code-ISO province'] = province_coordinates['Place Name'].map(mapping_provinces)

mapping_lat = {x['code-ISO province'] : x['Latitude'] for x in province_coordinates.to_dict('records')}
mapping_lon = {x['code-ISO province'] : x['Longitude'] for x in province_coordinates.to_dict('records')}

province_coordinates

freqProvinces['lat'] = freqProvinces['code-ISO province'].map(mapping_lat)
freqProvinces['lon'] = freqProvinces['code-ISO province'].map(mapping_lon)

freqProvinces


Unnamed: 0,Province / Territoire,count,code-ISO province,lat,lon
0,Ontario,195,CA-ON,50.0,-85.0
1,Colombie-Britannique,65,CA-BC,53.726669,-127.647621
2,Québec,59,CA-QC,53.0,-70.0
3,Alberta,44,CA-AB,55.0,-115.0
4,Terre-Neuve-et-Labrador,22,CA-NL,53.135509,-57.660435
5,Territoires du Nord-Ouest,18,CA-NT,,
6,Nouvelle-Écosse,12,CA-NS,45.0,-63.0
7,Manitoba,8,CA-MB,56.415211,-98.739075
8,Yukon,7,CA-YT,,
9,Saskatchewan,4,CA-SK,55.0,-106.0


In [7]:
px.scatter_geo(
        freqProvinces, 
        locations="Code Alpha-3 Pays",
        locationmode = 'ISO-3',
        hover_name = 'Pays',
        size="count",
        size_max = 50,
        height=440,
        projection = 'equirectangular',
    )