In [19]:
import pandas as pd
import numpy as np

# Lire le fichier CSV
# Keep keep_default_na=False to prevent a problem with Namibia
# WHO TB incidence estimates disaggregated by age group, sex and risk factor from https://www.who.int/teams/global-tuberculosis-programme/data#csv_files
df = pd.read_csv('TB_burden_age_sex_2025-07-03.csv', header=0, keep_default_na=False, low_memory=False)
# WPP_2024 Population 1950-2023 from https://population.un.org/wpp/downloads?folder=Standard%20Projections&group=CSV%20format
df2 = pd.read_csv('WPP2024_TotalPopulationBySex.csv', header=0, keep_default_na=False, low_memory=False)

# Process
df_filtered = df[(df['age_group'] == 'all') & (df['sex'] == 'a') & (df['risk_factor'] == 'all')]
columns_to_keep = ['country', 'iso2', 'year', 'best']
df_filtered = df_filtered[columns_to_keep]

# Loop through each row in df_filtered and set the Pop value
df2_filtered = df2[df2['Time'] == 2023]
df_final = pd.merge(df_filtered, df2_filtered[['ISO2_code', 'PopTotal']],
              left_on='iso2', right_on='ISO2_code', how='left')
df_final.drop(columns=['ISO2_code'], inplace=True)
#print(df_final.dtypes)
df_final["Incidence_per_100k"] = np.where(
    (df_final["PopTotal"].notna()) & (df_final["PopTotal"] != 0),
    round(df_final["best"] / df_final["PopTotal"] * 100, 2),
    np.nan
)

print(df_final.head())

# Convertir en français
# https://gist.githubusercontent.com/lneveu/cdb444b0e609ed81d3ad1f5907cda6f8/raw/8ad6e298e55284b074dcd716da2d3b25904c29f8/iso-3166_country_french.json
iso2_to_fr = {
	"AD" : "Andorre",
	"AE" : "Émirats Arabes Unis",
	"AF" : "Afghanistan",
	"AG" : "Antigua-Et-Barbuda",
	"AI" : "Anguilla",
	"AL" : "Albanie",
	"AM" : "Arménie",
	"AO" : "Angola",
	"AP" : "Région Asie/Pacifique",
	"AQ" : "Antarctique",
	"AR" : "Argentine",
	"AS" : "Samoa Américaines",
	"AT" : "Autriche",
	"AU" : "Australie",
	"AW" : "Aruba",
	"AX" : "Îles Åland",
	"AZ" : "Azerbaïdjan",
	"BA" : "Bosnie-Herzégovine",
	"BB" : "Barbad",
	"BD" : "Bangladesh",
	"BE" : "Belgique",
	"BF" : "Burkina Faso",
	"BG" : "Bulgarie",
	"BH" : "Bahreïn",
	"BI" : "Burundi",
	"BJ" : "Bénin",
	"BL" : "Saint-Barthélemy",
	"BM" : "Bermudes",
	"BN" : "Brunei Darussalam",
	"BO" : "État Plurinational De Bolivie",
	"BQ" : "Bonaire, Saint-Eustache Et Saba",
	"BR" : "Brésil",
	"BS" : "Bahamas",
	"BT" : "Bhoutan",
	"BV" : "Île Bouvet",
	"BW" : "Botswana",
	"BY" : "Biélorussie",
	"BZ" : "Belize",
	"CA" : "Canada",
	"CC" : "Îles Cocos",
	"CD" : "République Démocratique Du Congo",
	"CF" : "République Centrafricaine",
	"CG" : "Congo",
	"CH" : "Suisse",
	"CI" : "Côte D'Ivoire",
	"CK" : "Îles Cook",
	"CL" : "Chili",
	"CM" : "Cameroun",
	"CN" : "Chine",
	"CO" : "Colombie",
	"CR" : "Costa Rica",
	"CU" : "Cuba",
	"CV" : "Cap-Vert",
	"CW" : "Curaçao",
	"CX" : "Île Christmas",
	"CY" : "Chypre",
	"CZ" : "République Tchèque",
	"DE" : "Allemagne",
	"DJ" : "Djibouti",
	"DK" : "Denmark",
	"DM" : "Dominique",
	"DO" : "République Dominicaine",
	"DZ" : "Algérie",
	"EC" : "Équateur",
	"EE" : "Estonie",
	"EG" : "Égypte",
	"EH" : "Sahara Occidental",
	"ER" : "Érythrée",
	"ES" : "Espagne",
	"ET" : "Éthiopie",
	"EU" : "Europe",
	"FI" : "Finlande",
	"FJ" : "Fidji",
	"FK" : "Îles Malouines",
	"FM" : "États Fédérés De Micronésie",
	"FO" : "Îles Féroé",
	"FR" : "France",
	"GA" : "Gabon",
	"GB" : "Royaume-Uni",
	"GD" : "Grenade",
	"GE" : "Géorgie",
	"GF" : "Guyane",
	"GG" : "Guernesey",
	"GH" : "Ghana",
	"GI" : "Gibraltar",
	"GL" : "Groenland",
	"GM" : "Gambie",
	"GN" : "Guinée",
	"GP" : "Guadeloupe",
	"GQ" : "Guinée Équatoriale",
	"GR" : "Grèce",
	"GS" : "Géorgie Du Sud-Et-Les Îles Sandwich Du Sud",
	"GT" : "Guatemala",
	"GU" : "Guam",
	"GW" : "Guinée-Bissau",
	"GY" : "Guyana",
	"HK" : "Hong Kong",
	"HM" : "Îles Heard-Et-MacDonald",
	"HN" : "Honduras",
	"HR" : "Croatie",
	"HT" : "Haïti",
	"HU" : "Hongrie",
	"ID" : "Indonésie",
	"IE" : "Irlande",
	"IL" : "Israël",
	"IM" : "Île De Man",
	"IN" : "Inde",
	"IO" : "Territoire Britannique De L'océan Indien",
	"IQ" : "Irak",
	"IR" : "République Islamique D'Iran",
	"IS" : "Islande",
	"IT" : "Italie",
	"JE" : "Jersey",
	"JM" : "Jamaïque",
	"JO" : "Jordanie",
	"JP" : "Japon",
	"KE" : "Kenya",
	"KG" : "Kirghizistan",
	"KH" : "Cambodge",
	"KI" : "Kiribati",
	"KM" : "Comores",
	"KN" : "Saint-Christophe-et-Niévès",
	"KP" : "République Populaire Démocratique De Corée",
	"KR" : "République De Corée",
	"KW" : "Koweït",
	"KY" : "Îles Caïmans",
	"KZ" : "Kazakhstan",
	"LA" : "République Démocratique Populaire Lao",
	"LB" : "Liban",
	"LC" : "Sainte-Lucie",
	"LI" : "Liechtenstein",
	"LK" : "Sri Lanka",
	"LR" : "Liberia",
	"LS" : "Lesotho",
	"LT" : "Lituanie",
	"LU" : "Luxembourg",
	"LV" : "Lettonie",
	"LY" : "Libye",
	"MA" : "Maroc",
	"MC" : "Monaco",
	"MD" : "République De Moldavie",
	"ME" : "Monténégro",
	"MF" : "Saint-Martin (Partie Française)",
	"MG" : "Madagascar",
	"MH" : "Îles Marshall",
	"MK" : "Macédoine",
	"ML" : "Mali",
	"MM" : "Birmanie",
	"MN" : "Mongolie",
	"MO" : "Macao",
	"MP" : "Îles Mariannes Du Nord",
	"MQ" : "Martinique",
	"MR" : "Mauritanie",
	"MS" : "Montserrat",
	"MT" : "Malte",
	"MU" : "Maurice",
	"MV" : "Maldives",
	"MW" : "Malawi",
	"MX" : "Mexique",
	"MY" : "Malaisie",
	"MZ" : "Mozambique",
	"NA" : "Namibie",
	"NC" : "Nouvelle-Calédonie",
	"NE" : "Niger",
	"NF" : "Île Norfolk",
	"NG" : "Nigéria",
	"NI" : "Nicaragua",
	"NL" : "Pays-Bas",
	"NO" : "Norvège",
	"NP" : "Népal",
	"NR" : "Nauru",
	"NU" : "Niue",
	"NZ" : "Nouvelle-Zélande",
	"OM" : "Oman",
	"PA" : "Panama",
	"PE" : "Pérou",
	"PF" : "Polynésie Française",
	"PG" : "Papouasie-Nouvelle-Guinée",
	"PH" : "Philippines",
	"PK" : "Pakistan",
	"PL" : "Pologne",
	"PM" : "Saint-Pierre-Et-Miquelon",
	"PN" : "Pitcairn",
	"PR" : "Porto Rico",
	"PS" : "Territoires Palestiniens Occupés",
	"PT" : "Portugal",
	"PW" : "Palaos",
	"PY" : "Paraguay",
	"QA" : "Qatar",
	"RE" : "Réunion",
	"RO" : "Roumanie",
	"RS" : "Serbie",
	"RU" : "Fédération De Russie",
	"RW" : "Rwanda",
	"SA" : "Arabie Saoudite",
	"SB" : "Îles Salomon",
	"SC" : "Seychelles",
	"SD" : "Soudan",
	"SE" : "Suède",
	"SG" : "Singapour",
	"SH" : "Sainte-Hélène",
	"SI" : "Slovénie",
	"SJ" : "Svalbard Et Jan Mayen",
	"SK" : "Slovaquie",
	"SL" : "Sierra Leone",
	"SM" : "Saint-Marin",
	"SN" : "Sénégal",
	"SO" : "Somalie",
	"SR" : "Suriname",
	"SS" : "Soudan Du Sud",
	"ST" : "Sao Tomé-Et-Principe",
	"SV" : "République Du Salvador",
	"SX" : "Saint-Martin (Partie Néerlandaise)",
	"SY" : "République Arabe Syrienne",
	"SZ" : "Swaziland",
	"TC" : "Îles Turks-Et-Caïcos",
	"TD" : "Tchad",
	"TF" : "Terres Australes Françaises",
	"TG" : "Togo",
	"TH" : "Thaïlande",
	"TJ" : "Tadjikistan",
	"TK" : "Tokelau",
	"TL" : "Timor-Leste",
	"TM" : "Turkménistan",
	"TN" : "Tunisie",
	"TO" : "Tonga",
	"TR" : "Turquie",
	"TT" : "Trinité-Et-Tobago",
	"TV" : "Tuvalu",
	"TW" : "Taïwan",
	"TZ" : "République-Unie De Tanzanie",
	"UA" : "Ukraine",
	"UG" : "Ouganda",
	"UM" : "Îles Mineures Éloignées Des États-Unis",
	"US" : "États-Unis",
	"UY" : "Uruguay",
	"UZ" : "Ouzbékistan",
	"VA" : "Saint-Siège (État De La Cité Du Vatican)",
	"VC" : "Saint-Vincent-Et-Les Grenadines",
	"VE" : "Venezuela",
	"VG" : "Îles Vierges Britanniques",
	"VI" : "Îles Vierges Des États-Unis",
	"VN" : "Viet Nam",
	"VU" : "Vanuatu",
	"WF" : "Wallis Et Futuna",
	"WS" : "Samoa",
	"YE" : "Yémen",
	"YT" : "Mayotte",
	"ZA" : "Afrique Du Sud",
	"ZM" : "Zambie",
	"ZW" : "Zimbabwe"
}

df_final['country'] = df_final['iso2'].map(iso2_to_fr)

df_export = df_final[['country', 'Incidence_per_100k']]
print('\n\n Tableau enregistré')
print(df_export.head())

# Sauvegarder le fichier en UTF-8
df_export.to_csv('../assets/data/tuberculosis_prevalence.csv', index=False, sep=';', lineterminator='\n')
print('\n Terminé. tuberculosis_prevalence.csv exporté dans le dossier /assets/data/ .')


          country iso2  year   best   PopTotal  Incidence_per_100k
0     Afghanistan   AF  2023  75000  41454.761              180.92
1         Albania   AL  2023    430   2811.655               15.29
2         Algeria   DZ  2023  22000  46164.219               47.66
3  American Samoa   AS  2023      2     47.521                4.21
4         Andorra   AD  2023      5     80.856                6.18


 Tableau enregistré
             country  Incidence_per_100k
0        Afghanistan              180.92
1            Albanie               15.29
2            Algérie               47.66
3  Samoa Américaines                4.21
4            Andorre                6.18

 Terminé. tuberculosis_prevalence.csv exporté dans le dossier /assets/data/ .
