In [55]:
import pandas as pd
import plotly
import plotly_express as px
from slugify import slugify
from utils.utils import *

tableClasses = ['table', 'table-hover']

# #### Répartition des chercheurs (nombre de co-Pis, co-candidats, collaborateurs, etc.)
chercheurs  = pd.read_excel('data/fnfr_transformation_chercheur-e-s.xlsx')

#### Répartition géographique
## Répartition internationale
# charger le mapping des pays qui sera utilisé pour construire la figure
mappingPays = pd.read_csv('utils/mapping_pays_iso.csv', sep=';', encoding='UTF-8').to_dict('records')
mappingPays = {x['Pays'] : x['Alpha-3 code'] for x in mappingPays}

# Charger les données
affiliations = pd.read_csv('data/affiliationsChercheurs.csv')
affiliations = affiliations[affiliations['Pays'] != '-']
affiliations['Code Alpha-3 Pays'] = affiliations['Pays'].map(mappingPays)

def generate_geo_figure(df: pd.DataFrame) -> plotly.graph_objs._figure.Figure:
    figPays = px.scatter_geo(
        df, 
        locations="Code Alpha-3 Pays",
        locationmode = 'ISO-3',
        hover_name = 'Pays',
        size="count",
        size_max = 50,
        height=440,
        projection = 'equirectangular',
    )

    # Customize the layout
    figPays.update_geos(
        showcoastlines=False,  # Hide coastlines/borders
        showland=True,  # Hide land area color
        landcolor = '#E8E8E8',
        showframe=True,  # Hide frame/borders
        projection_scale = 1.125,  # Adjust the projection scale to fit the map better
        center=dict(lon=20, lat=18),  # Set the center of the map to exclude Antarctica
    )

    figPays = figPays.update_layout( 
        margin=dict(t=0, l=0, r=0, b=0),
        title_x=0.2, 
    )

    return figPays

# Générer les figures
figs = []
tablesFreq = {}
mappingTables = {'count': 'N'}

# Figure - Tous les projets 
# Tableau de fréquence 
freqPays = pd.DataFrame(affiliations['Pays'].value_counts()).reset_index()
freqPays['Code Alpha-3 Pays'] = freqPays['Pays'].map(mappingPays)
freqPays = freqPays[['Pays', 'Code Alpha-3 Pays', 'count']]

with open('figures/geo/international/all.html', 'w') as f:
    f.write(generate_geo_figure(freqPays).to_html(full_html=False, include_plotlyjs='cdn'))

figs.append(
    {
        'Nom': 'Tout',
        'Fichier': 'figures/geo/all.html'
    }
)

# Create the table to display aside from the figure
tableF = freqPays.rename(columns = mappingTables)
tableF = tableF.sort_values(by=['N'], ascending=[False])[['Pays', 'N']]
tablesFreq[f"figures/geo/all.html"] = tableF.to_html(classes = tableClasses, justify='left', index=False)

# Figures - Par concours
freqPaysConcours = affiliations.groupby(['concours', 'Pays', 'Code Alpha-3 Pays'])['chercheur'].count().reset_index()
freqPaysConcours = freqPaysConcours.rename(columns={'chercheur': 'count'})

for c in freqPaysConcours['concours'].unique():
    subdf = freqPaysConcours[freqPaysConcours['concours'] == c]

    with open(f'figures/geo/{c}.html', 'w') as f:
        f.write(generate_geo_figure(subdf).to_html(full_html=False, include_plotlyjs='cdn'))

    figs.append(
        {
            'Nom': c,
            'Fichier': f'figures/geo/international/{c}.html'
        }
    ) 

    subdf = subdf[['Pays', 'count']].sort_values(by='count', ascending=False)
    
    # Create the table to display aside from the figure
    tableF = subdf.rename(columns = mappingTables)

    tableF = tableF.sort_values(by=['N'], ascending=[False])
    tablesFreq[f"figures/geo/international/{c}.html"] = tableF.to_html(classes = tableClasses, justify='left', index=False)

    # Figures - Par projet
    freqPaysProjets = affiliations.groupby(['projet', 'concours', 'Pays', 'Code Alpha-3 Pays'])['chercheur'].count().reset_index()
    freqPaysProjets = freqPaysProjets[freqPaysProjets['concours'] == c]
    freqPaysProjets = freqPaysProjets.rename(columns={'chercheur': 'count'})

    for p in freqPaysProjets['projet'].unique():
        slugifiedName = slugify(p)[:30]

        ssubdf = freqPaysProjets[freqPaysProjets['projet'] == p]

        with open(f'figures/geo/international/{slugifiedName}.html', 'w') as f:
            f.write(generate_geo_figure(ssubdf).to_html(full_html=False, include_plotlyjs='cdn'))

        figs.append(
            {
                'Nom': f"{c} -- {p}",
                'Fichier': f'figures/geo/international/{slugifiedName}.html'
            }
        ) 

        ssubdf = ssubdf[['Pays', 'count']].sort_values(by='count', ascending=False)
        
        # Create the table to display aside from the figure
        tableF = ssubdf.rename(columns = mappingTables)

        tableF = tableF.sort_values(by=['N'], ascending=[False])
        tablesFreq[f"figures/geo/international/{slugifiedName}.html"] = tableF.to_html(classes = tableClasses, justify='left', index=False)

tablesFreq = str(tablesFreq)


In [56]:
## Répartition pancanadienne
mapping_provinces = pd.read_csv('utils/mapping_provinces_canada_iso.csv')
mapping_provinces = mapping_provinces[mapping_provinces['Language code'] == 'fr']
mapping_provinces = {x['Subdivision name'] : x['3166-2 code'] for x in mapping_provinces.to_dict('records')}

province_info = pd.read_csv('utils/provinces_canada_coordinates.csv')
province_info['code-ISO province'] = province_info['Place Name'].map(mapping_provinces)
province_info = province_info.rename(columns={'Place Name': 'Province'})

affiliationsCanada = affiliations[affiliations['Pays'] == 'Canada']
affiliationsCanada = affiliationsCanada.merge(province_info, on='Province')

freqProvinces = pd.DataFrame(affiliationsCanada['Province'].value_counts()).reset_index()
freqProvinces = freqProvinces.merge(province_info, on='Province')

freqProvinces

Unnamed: 0,Province,count,Latitude,Longitude,code-ISO province
0,Ontario,195,50.0,-85.0,CA-ON
1,Colombie-Britannique,65,53.726669,-127.647621,CA-BC
2,Québec,59,53.0,-70.0,CA-QC
3,Alberta,44,55.0,-115.0,CA-AB
4,Terre-Neuve-et-Labrador,22,53.135509,-57.660435,CA-NL
5,Territoires du Nord-Ouest,18,62.453972,-114.371788,CA-NT
6,Nouvelle-Écosse,12,45.0,-63.0,CA-NS
7,Manitoba,8,56.415211,-98.739075,CA-MB
8,Yukon,7,64.0,-135.0,CA-YT
9,Saskatchewan,4,55.0,-106.0,CA-SK


In [57]:
# Create a scattergeo map with size of dots based on the 'count' column
figProvince = px.scatter_geo(
    freqProvinces,
    lat='Latitude',
    lon='Longitude',
    size='count',
    color = 'Province',
    size_max = 40,
    color_discrete_sequence=px.colors.qualitative.Prism,
    projection='equirectangular',  # You can change the projection as needed
    hover_data={'Longitude':False,'Latitude':False}
)

# Customize the layout
figProvince.update_geos(
    scope= 'world',
    showcoastlines=False,  # Hide coastlines/borders
    showland=True,  # Hide land area color
    showframe = False,
    landcolor = '#E8E8E8',
    center=dict(lon=-100, lat=60),  # Adjust lon and lat for centering
    lonaxis_range=[-140, -45],  # Adjust the range as needed for the desired zoom level
    lataxis_range=[40, 85],  # Adjust the range as needed for the desired zoom level
)


figProvince = figProvince.update_layout( 
    margin=dict(t=20, l=0, r=0, b=0),
    title_x=0.2,    
)

figProvince.show()

In [58]:
import pandas as pd
import plotly
import plotly_express as px
from slugify import slugify
from utils.utils import *

affiliations = pd.read_csv('data/affiliationsChercheurs.csv')

## Répartition pancanadienne
# Create a scattergeo map with size of dots based on the 'count' column
def generate_geo_figure_provinces(df: pd.DataFrame) -> plotly.graph_objs._figure.Figure:
# Create a scattergeo map with size of dots based on the 'count' column
    figProvince = px.scatter_geo(
        df,
        lat='Latitude',
        lon='Longitude',
        size='count',
        color = 'Province',
        size_max = 40,
        color_discrete_sequence=px.colors.qualitative.Prism,
        projection='equirectangular',  # You can change the projection as needed
        hover_data={'Longitude':False,'Latitude':False}
    )

    # Customize the layout
    figProvince.update_geos(
        scope= 'world',
        showcoastlines=False,  # Hide coastlines/borders
        showland=True,  # Hide land area color
        showframe = False,
        landcolor = '#E8E8E8',
        center=dict(lon=-100, lat=60),  # Adjust lon and lat for centering
        lonaxis_range=[-140, -45],  # Adjust the range as needed for the desired zoom level
        lataxis_range=[40, 85],  # Adjust the range as needed for the desired zoom level
    )


    figProvince = figProvince.update_layout( 
        margin=dict(t=20, l=0, r=0, b=0),
        title_x=0.2,    
    )

    return figProvince

# Générer les figures
figsProvinces = []
tablesFreqProvinces = {}
mappingTables = {'count': 'N'}

# Figure - Tous les projets 
# Tableau de fréquence 
affiliationsCanada = affiliations[affiliations['Pays'] == 'Canada']
## Répartition pancanadienne
mapping_provinces = pd.read_csv('utils/mapping_provinces_canada_iso.csv')
mapping_provinces = mapping_provinces[mapping_provinces['Language code'] == 'fr']
mapping_provinces = {x['Subdivision name'] : x['3166-2 code'] for x in mapping_provinces.to_dict('records')}

province_info = pd.read_csv('utils/provinces_canada_coordinates.csv')
province_info['code-ISO province'] = province_info['Place Name'].map(mapping_provinces)
province_info = province_info.rename(columns={'Place Name': 'Province'})

affiliationsCanada = affiliations[affiliations['Pays'] == 'Canada']
affiliationsCanada = affiliationsCanada.merge(province_info, on='Province')

freqProvinces = pd.DataFrame(affiliationsCanada['Province'].value_counts()).reset_index()
freqProvinces = freqProvinces.merge(province_info, on='Province')

with open('figures/geo/canada/all.html', 'w') as f:
    f.write(generate_geo_figure_provinces(freqProvinces).to_html(full_html=False, include_plotlyjs='cdn'))

figsProvinces.append(
    {
        'Nom': 'Tout',
        'Fichier': 'figures/geo/canada/all.html'
    }
)

# Create the table to display aside from the figure
tableF = freqProvinces.rename(columns = mappingTables)
tableF = tableF.sort_values(by=['N'], ascending=[False])[['Province', 'N']]
tablesFreqProvinces[f"figures/geo/canada/all.html"] = tableF.to_html(classes = tableClasses, justify='left', index=False)

# Figures - Par concours
freqProvinceConcours = affiliationsCanada.groupby(['concours', 'Province', 'code-ISO province'])['chercheur'].count().reset_index()
freqProvinceConcours = freqProvinceConcours.rename(columns={'chercheur': 'count'})
freqProvinceConcours = freqProvinceConcours.merge(province_info, on='Province')


### Revoir à partir d'ici
for c in freqProvinceConcours['concours'].unique():
    subdf = freqProvinceConcours[freqProvinceConcours['concours'] == c]

    with open(f'figures/geo/canada/{c}.html', 'w') as f:
        f.write(generate_geo_figure_provinces(subdf).to_html(full_html=False, include_plotlyjs='cdn'))

    figsProvinces.append(
        {
            'Nom': c,
            'Fichier': f'figures/geo/canada/{c}.html'
        }
    ) 

    subdf = subdf[['Province', 'count']].sort_values(by='count', ascending=False)
    
    # Create the table to display aside from the figure
    tableF = subdf.rename(columns = mappingTables)

    tableF = tableF.sort_values(by=['N'], ascending=[False])
    tablesFreqProvinces[f"figures/geo/canada/{c}.html"] = tableF.to_html(classes = tableClasses, justify='left', index=False)

    # Figures - Par projet
    freqProvinceProjets = affiliationsCanada.groupby(['projet', 'concours', 'Province'])['chercheur'].count().reset_index()
    freqProvinceProjets = freqProvinceProjets.merge(province_info, on='Province')
    freqProvinceProjets = freqProvinceProjets[freqProvinceProjets['concours'] == c]
    freqProvinceProjets = freqProvinceProjets.rename(columns={'chercheur': 'count'})

    for p in freqProvinceProjets['projet'].unique():
        slugifiedName = slugify(p)[:30]

        ssubdf = freqProvinceProjets[freqProvinceProjets['projet'] == p]

        with open(f'figures/geo/canada/{slugifiedName}.html', 'w') as f:
            f.write(generate_geo_figure_provinces(ssubdf).to_html(full_html=False, include_plotlyjs='cdn'))

        figsProvinces.append(
            {
                'Nom': f"{c} -- {p}",
                'Fichier': f'figures/geo/canada/{slugifiedName}.html'
            }
        ) 

        ssubdf = ssubdf[['Province', 'count']].sort_values(by='count', ascending=False)
        
        # Create the table to display aside from the figure
        tableF = ssubdf.rename(columns = mappingTables)

        tableF = tableF.sort_values(by=['N'], ascending=[False])
        tablesFreqProvinces[f"figures/geo/canada/{slugifiedName}.html"] = tableF.to_html(classes = tableClasses, justify='left', index=False)

tablesFreq = str(tablesFreqProvinces)

In [59]:
figsProvinces

[{'Nom': 'Tout', 'Fichier': 'figures/geo/canada/all.html'},
 {'Nom': 2020, 'Fichier': 'figures/geo/canada/2020.html'},
 {'Nom': '2020 -- BIOSCAN: Tracing the Patterns of Life on a Changing Planet',
  'Fichier': 'figures/geo/canada/bioscan-tracing-the-patterns-o.html'},
 {'Nom': '2020 -- Mend the Gap: A Transformative Biomaterials Platform for Spinal Cord Repair',
  'Fichier': 'figures/geo/canada/mend-the-gap-a-transformative-.html'},
 {'Nom': '2020 -- Protection of Metallic Surfaces from Bulk to Nano Through Molecular-Level Innovation',
  'Fichier': 'figures/geo/canada/protection-of-metallic-surface.html'},
 {'Nom': '2020 -- Ărramăt: Strengthening Health and Well-Being through Indigenous-Led Conservation and Sustainable Relationships with Biodiversity',
  'Fichier': 'figures/geo/canada/arramat-strengthening-health-a.html'},
 {'Nom': '2020 -- Inclusive Design for Employment Access (IDEA)',
  'Fichier': 'figures/geo/canada/inclusive-design-for-employmen.html'},
 {'Nom': '2020 -- The Next