In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
pd.options.plotting.backend = "matplotlib" 
import re
from tqdm.notebook import tqdm

# Import global data

In [467]:
data = pd.read_csv('migr_asyappctza_linear.csv.gz',  compression='gzip')
del data['OBS_FLAG']

In [468]:
convert_ISO_3166_2_to_1 = {
'AF':'AFG',
'AX':'ALA',
'AL':'ALB',
'DZ':'DZA',
'AS':'ASM',
'AD':'AND',
'AO':'AGO',
'AI':'AIA',
'AQ':'ATA',
'AG':'ATG',
'AR':'ARG',
'AM':'ARM',
'AW':'ABW',
'AU':'AUS',
'AT':'AUT',
'AZ':'AZE',
'BS':'BHS',
'BH':'BHR',
'BD':'BGD',
'BB':'BRB',
'BY':'BLR',
'BE':'BEL',
'BZ':'BLZ',
'BJ':'BEN',
'BM':'BMU',
'BT':'BTN',
'BO':'BOL',
'BA':'BIH',
'BW':'BWA',
'BV':'BVT',
'BR':'BRA',
'IO':'IOT',
'BN':'BRN',
'BG':'BGR',
'BF':'BFA',
'BI':'BDI',
'KH':'KHM',
'CM':'CMR',
'CA':'CAN',
'CV':'CPV',
'KY':'CYM',
'CF':'CAF',
'TD':'TCD',
'CL':'CHL',
'CN':'CHN',
'CX':'CXR',
'CC':'CCK',
'CO':'COL',
'KM':'COM',
'CG':'COG',
'CD':'COD',
'CK':'COK',
'CR':'CRI',
'CI':'CIV',
'HR':'HRV',
'CU':'CUB',
'CY':'CYP',
'CZ':'CZE',
'DK':'DNK',
'DJ':'DJI',
'DM':'DMA',
'DO':'DOM',
'EC':'ECU',
'EG':'EGY',
'SV':'SLV',
'GQ':'GNQ',
'ER':'ERI',
'EE':'EST',
'ET':'ETH',
'FK':'FLK',
'FO':'FRO',
'FJ':'FJI',
'FI':'FIN',
'FR':'FRA',
'GF':'GUF',
'PF':'PYF',
'TF':'ATF',
'GA':'GAB',
'GM':'GMB',
'GE':'GEO',
'DE':'DEU',
'GH':'GHA',
'GI':'GIB',
'GR':'GRC',
'GL':'GRL',
'GD':'GRD',
'GP':'GLP',
'GU':'GUM',
'GT':'GTM',
'GG':'GGY',
'GN':'GIN',
'GW':'GNB',
'GY':'GUY',
'HT':'HTI',
'HM':'HMD',
'VA':'VAT',
'HN':'HND',
'HK':'HKG',
'HU':'HUN',
'IS':'ISL',
'IN':'IND',
'ID':'IDN',
'IR':'IRN',
'IQ':'IRQ',
'IE':'IRL',
'IM':'IMN',
'IL':'ISR',
'IT':'ITA',
'JM':'JAM',
'JP':'JPN',
'JE':'JEY',
'JO':'JOR',
'KZ':'KAZ',
'KE':'KEN',
'KI':'KIR',
'KP':'PRK',
'KR':'KOR',
'KW':'KWT',
'KG':'KGZ',
'LA':'LAO',
'LV':'LVA',
'LB':'LBN',
'LS':'LSO',
'LR':'LBR',
'LY':'LBY',
'LI':'LIE',
'LT':'LTU',
'LU':'LUX',
'MO':'MAC',
'MK':'MKD',
'MG':'MDG',
'MW':'MWI',
'MY':'MYS',
'MV':'MDV',
'ML':'MLI',
'MT':'MLT',
'MH':'MHL',
'MQ':'MTQ',
'MR':'MRT',
'MU':'MUS',
'YT':'MYT',
'MX':'MEX',
'FM':'FSM',
'MD':'MDA',
'MC':'MCO',
'MN':'MNG',
'ME':'MNE',
'MS':'MSR',
'MA':'MAR',
'MZ':'MOZ',
'MM':'MMR',
'NA':'NAM',
'NR':'NRU',
'NP':'NPL',
'NL':'NLD',
'AN':'ANT',
'NC':'NCL',
'NZ':'NZL',
'NI':'NIC',
'NE':'NER',
'NG':'NGA',
'NU':'NIU',
'NF':'NFK',
'MP':'MNP',
'NO':'NOR',
'OM':'OMN',
'PK':'PAK',
'PW':'PLW',
'PS':'PSE',
'PA':'PAN',
'PG':'PNG',
'PY':'PRY',
'PE':'PER',
'PH':'PHL',
'PN':'PCN',
'PL':'POL',
'PT':'PRT',
'PR':'PRI',
'QA':'QAT',
'RE':'REU',
'RO':'ROU',
'RU':'RUS',
'RW':'RWA',
'BL':'BLM',
'SH':'SHN',
'KN':'KNA',
'LC':'LCA',
'MF':'MAF',
'PM':'SPM',
'VC':'VCT',
'WS':'WSM',
'SM':'SMR',
'ST':'STP',
'SA':'SAU',
'SN':'SEN',
'RS':'SRB',
'SC':'SYC',
'SL':'SLE',
'SG':'SGP',
'SK':'SVK',
'SI':'SVN',
'SB':'SLB',
'SO':'SOM',
'ZA':'ZAF',
'GS':'SGS',
'ES':'ESP',
'LK':'LKA',
'SD':'SDN',
'SR':'SUR',
'SJ':'SJM',
'SZ':'SWZ',
'SE':'SWE',
'CH':'CHE',
'SY':'SYR',
'TW':'TWN',
'TJ':'TJK',
'TZ':'TZA',
'TH':'THA',
'TL':'TLS',
'TG':'TGO',
'TK':'TKL',
'TO':'TON',
'TT':'TTO',
'TN':'TUN',
'TR':'TUR',
'TM':'TKM',
'TC':'TCA',
'TV':'TUV',
'UG':'UGA',
'UA':'UKR',
'AE':'ARE',
'GB':'GBR',
'US':'USA',
'UM':'UMI',
'UY':'URY',
'UZ':'UZB',
'VU':'VUT',
'VE':'VEN',
'VN':'VNM',
'VG':'VGB',
'VI':'VIR',
'WF':'WLF',
'EH':'ESH',
'YE':'YEM',
'XK':'XXK',
'ZM':'ZMB',
'ZW':'ZWE'
}

In [469]:
data = data.replace({"citizen":convert_ISO_3166_2_to_1}).replace({"geo":convert_ISO_3166_2_to_1})

In [470]:
data['citizen'] = data['citizen'].replace('UK','GBR').replace('EL','GRC')
data['geo'] = data['geo'].replace('UK','GBR').replace('EL','GRC')

In [471]:
data = data[data['sex']!='T']
#data = data[data['age']!='TOTAL']
data = data[~(data['age'].isin(["Y_LT18","TOTAL"]))]
data = data[~(data['citizen'].isin(["EU27_2020","EU28","EXT_EU27_2020", "EXT_EU28",'UNK', "TOTAL", 'ESH', 'SS', 'STLS', 'UK_OCT', 'XXK', 'COK', 'PRK', 'RNC', 'TWN','VAT', 'NRU', 'MCO']))]
#[RNC] Recognised non-citizens
#[STLS] Stateless
#[UNK] Unknown
data = data[data['citizen'].notna()]
data = data[~data['geo'].isin(["EU27_2020", "EU28"])]
data = data[data['asyl_app']=="ASY_APP"]
#data = data[~data['asyl_app'].isin(["ASY_APP"])]

In [472]:
sum(1 for elem in data['geo'].unique() if isinstance(elem, str) and len(elem) == 3) == len(data['geo'].unique())

True

In [473]:
data['info']=data.apply(lambda x:(x['citizen'],x['geo'],x['TIME_PERIOD']),axis=1)

In [474]:
def strisation(liste):
    string = str(liste[0])+str(liste[1])+str(liste[2])
    return string

data['info_bis']=data['info'].apply(strisation)

In [475]:
inp_list = list(data['info'])
unique_list = list(dict.fromkeys(inp_list))
print(len(unique_list))

81915


In [476]:
inp_list = list(data['info_bis'])
unique_list = list(dict.fromkeys(inp_list))
print(len(unique_list))

81915


In [477]:
unique_list_final = []
for i in unique_list:
    list_inter = []
    list_inter.append(i[0:3])
    list_inter.append(i[3:6])
    list_inter.append(i[6:])
    unique_list_final.append(list_inter)

In [478]:
reduced_data = pd.DataFrame(np.array(unique_list_final), columns=['citizen', 'geo','TIME_PERIOD'])

In [479]:
reduced_data['info'] = reduced_data.apply(lambda x:(x['citizen'],x['geo'],x['TIME_PERIOD']),axis=1)

In [481]:
reduced_data['info_bis']=reduced_data['info'].apply(strisation)

In [482]:
reduced_data

Unnamed: 0,citizen,geo,TIME_PERIOD,info,info_bis
0,AND,AUT,2008,"(AND, AUT, 2008)",ANDAUT2008
1,AND,AUT,2009,"(AND, AUT, 2009)",ANDAUT2009
2,AND,AUT,2010,"(AND, AUT, 2010)",ANDAUT2010
3,AND,AUT,2011,"(AND, AUT, 2011)",ANDAUT2011
4,AND,AUT,2012,"(AND, AUT, 2012)",ANDAUT2012
...,...,...,...,...,...
81910,ZWE,GBR,2015,"(ZWE, GBR, 2015)",ZWEGBR2015
81911,ZWE,GBR,2016,"(ZWE, GBR, 2016)",ZWEGBR2016
81912,ZWE,GBR,2017,"(ZWE, GBR, 2017)",ZWEGBR2017
81913,ZWE,GBR,2018,"(ZWE, GBR, 2018)",ZWEGBR2018


# Code ISO

In [47]:
#Importer les modules
import requests
import csv
from bs4 import BeautifulSoup
# Adresse du site Internet
url = "https://www.atlas-monde.net/codes-iso/"
# Exécuter la requête GET
response = requests.get(url)
# Parser le document HTML BeautifulSoup obtenu à partir du code source
html = BeautifulSoup(response.text, 'html.parser')
# Extraire toutes les citations et tous les auteurs du document HTML
quotes_html = html.find_all('td')#, class_="text")
#authors_html = html.find_all('small', class_="author")
# Rassembler les citations dans une liste
quotes = list()
for quote in quotes_html:
    quotes.append(quote.text)

In [48]:
#len(quotes)//4
df_ISO = pd.DataFrame(index=range(0,248), columns=['Pays', 'ISO 3166-1 alpha-2','Country Code', 'ISO 3166-1 numérique'])
count=0
for ele in quotes:
    ligne = count//4
    if count%4==0:
        df_ISO.iloc[ligne,0]=ele
    if count%4==1:
        df_ISO.iloc[ligne,1]=ele
    if count%4==2:
        df_ISO.iloc[ligne,2]=ele
    if count%4==3:
        df_ISO.iloc[ligne,3]=ele
    count=count+1

In [116]:
for i in df_ISO['Pays']:
    print(i)

Pays
Afghanistan
Îles Åland
Albanie
Algérie
Samoa américaines
Andorre
Angola
Anguilla
Antarctique
Antigua-et-Barbuda
Argentine
Arménie
Aruba
Australie
Autriche
Azerbaïdjan
Bahamas
Bahreïn
Bangladesh
Barbade
Biélorussie
Belgique
Belize
Bénin
Bermudes
Bhoutan
Bolivie
Bosnie-Herzégovine
Botswana
Île Bouvet
Brésil
British Virgin Islands
Territoire britannique de l’Océan Indien
Brunei Darussalam
Bulgarie
Burkina Faso
Burundi
Cambodge
Cameroun
Canada
Cap-Vert
Iles Cayman
République centrafricaine
Tchad
Chili
Chine
Hong Kong
Macao
Île Christmas
Îles Cocos
Colombie
Comores
République du Congo
République démocratique du Congo
Îles Cook
Costa Rica
Côte d’Ivoire
Croatie
Cuba
Chypre
République tchèque
Danemark
Djibouti
Dominique
République dominicaine
Équateur
Égypte
Salvador
Guinée équatoriale
Érythrée
Estonie
Éthiopie
Îles Falkland
Îles Féroé
Fidji
Finlande
France
Guyane française
Polynésie française
Terres australes et antarctiques françaises
Gabon
Gambie
Géorgie
Allemagne
Ghana
Gibraltar
Grèce

# Langues

In [None]:
# CODE DEJA UTILISE ==> cf. PLUS BAS

#Importer les modules
import requests
import csv
from bs4 import BeautifulSoup
# Adresse du site Internet
url = "https://hors-frontieres.fr/langues-officielles-de-tous-les-pays-du-monde/"
# Exécuter la requête GET
response = requests.get(url)
# Parser le document HTML BeautifulSoup obtenu à partir du code source
html = BeautifulSoup(response.text, 'html.parser')
# Extraire toutes les citations et tous les auteurs du document HTML
quotes_html = html.find_all('td')#, class_="text")
#authors_html = html.find_all('small', class_="author")
# Rassembler les citations dans une liste
quotes = list()
for quote in quotes_html:
    quotes.append(quote.text)

In [77]:
# j'ai supprimé les mentions inutiles

quotes = ['Albanie',
 'albanais',
 '7',
 'Allemagne',
 'allemand',
 '27',
 'Andorre',
 'catalan',
 '3',
 'Autriche',
 'allemand',
 '9',
 'Belgique',
 'néerlandais/français/allemand',
 '10',
 'Biélorussie',
 'biélorusse',
 '2',
 'Bosnie-Herzégovine',
 'serbe/croate/bosniaque',
 '4',
 'Bulgarie',
 'bulgare',
 '11',
 'Chypre (nord)',
 'turc',
 '2',
 'Chypre (sud)',
 'grec',
 '2',
 'Croatie',
 'croate',
 '7',
 'Danemark',
 'danois',
 '7',
 'Espagne',
 'espagnol (castillan)',
 '14',
 'Estonie',
 'estonien',
 '2',
 'Finlande',
 'finnois/suédois',
 '12',
 'France',
 'français',
 '23',
 'Gibraltar\xa0(R.-U.)',
 'anglais',
 '6',
 'Grèce',
 'grec',
 '14',
 'Hongrie',
 'hongrois',
 '9',
 'Irlande',
 'anglais/irlandais',
 '5',
 'Islande',
 'islandais',
 '2',
 'Italie',
 'italien',
 '33',
 'Kosovo',
 'albanais/serbe',
 '10',
 'Lettonie',
 'letton',
 '5',
 'Liechtenstein',
 'allemand',
 '3',
 'Lituanie',
 'lituanien',
 '4',
 'Luxembourg',
 'luxembourgeois/français/allemand',
 '3',
 'Macédoine',
 'macédonien',
 '9',
 'Malte',
 'anglais/maltais',
 '4',
 'Moldavie',
 'moldave',
 '5',
 'Monaco',
 'français',
 '3',
 'Monténégro',
 'monténégrin',
 '5',
 'Norvège',
 'bokmål/nynorsk',
 '10',
 'Pays-Bas',
 'néerlandais',
 '15',
 'Pologne',
 'polonais',
 '14',
 'Portugal',
 'portugais',
 '8',
 'République tchèque',
 'tchèque',
 '7',
 'Roumanie',
 'roumain',
 '15',
 'Royaume-Uni',
 'anglais',
 '12',
 'Russie',
 'russe',
 '56 (100)',
 'Saint Marin',
 'italien',
 '2',
 'Serbie',
 'serbe',
 '14',
 'Slovaquie',
 'slovaque',
 '10',
 'Slovénie',
 'slovène',
 '4',
 'Suède',
 'suédois',
 '12',
 'Suisse',
 'allemand/français/italien',
 '12',
 'Ukraine',
 'ukrainien',
 '13',
 'Vatican',
 'italien',
 '1',
 'Afghanistan',
 'pachtou/dari',
 '48',
 'Arabie Saoudite',
 'arabe',
 '5',
 'Arménie',
 'arménien',
 '7',
 'Azerbaïdjan',
 'azerbaïdjanais',
 '16',
 'Bahreïn',
 'arabe',
 '3',
 'Bangladesh',
 'bengali',
 '42',
 'Birmanie',
 'birman',
 '111',
 'Bhoutan',
 'dzongkha',
 '25',
 'Brunei',
 'malais',
 '15',
 'Cambodge',
 'khmer',
 '23',
 'Chine',
 'chinois',
 '292',
 'Corée du Nord',
 'coréen',
 '1',
 'Corée du Sud',
 'coréen',
 '2',
 'Émirats arabes unis',
 'arabe',
 '7',
 'Géorgie',
 'géorgien',
 '13',
 'Hong Kong (Chine)',
 'cantonais/anglais',
 '6',
 'Inde',
 'hindi/anglais',
 '438',
 'Indonésie',
 'indonésien',
 '719',
 'Iran',
 'iranien (farsi)',
 '75',
 'Irak',
 'arabe',
 '22',
 'Israël',
 'hébreu/arabe',
 '33',
 'Japon',
 'japonais',
 '15',
 'Jordanie',
 'arabe',
 '10',
 'Kazakhstan',
 'kazakh',
 '8',
 'Koweït',
 'arabe',
 '3',
 'Kirghizistan',
 'kirghiz',
 '3',
 'Laos',
 'laotien',
 '84',
 'Liban',
 'arabe',
 '3',
 'Macao (Chine)',
 'portugais/chinois',
 '3',
 'Malaisie',
 'malais',
 '137',
 'Maldives',
 'maldivien',
 '1',
 'Mongolie',
 'khalkha (mongol)',
 '13',
 'Népal',
 'népali',
 '124',
 'Oman',
 'arabe',
 '15',
 'Ouzbékistan',
 'ouzbek',
 '8',
 'Pakistan',
 'anglais/ourdou',
 '72',
 'Palestine',
 'arabe',
 '4',
 'Philippines',
 'filipino/anglais',
 '171',
 'Qatar',
 'arabe',
 '3',
 'Singapour',
 'anglais/chinois/malais/tamoul',
 '21',
 'Sri Lanka',
 'singhalais/tamoul',
 '7',
 'Syrie',
 'arabe',
 '16',
 'Taiwan',
 'chinois',
 '22',
 'Tadjikistan',
 'tadjik',
 '12',
 'Thaïlande',
 'thaï',
 '74',
 'Timor oriental',
 'portugais/tétum',
 '19',
 'Turquie',
 'turc',
 '34',
 'Turkménistan',
 'turkmène',
 '4',
 'Vietnam',
 'vietnamien',
 '106',
 'Yémen',
 'arabe',
 '10',
 'Anguilla\xa0(R.-U.)',
 'anglais',
 '2',
 'Antigua/Barbuda',
 'anglais',
 '2',
 'Antilles néerlandaises',
 'néerlandais',
 '4',
 'Argentine',
 'espagnol',
 '25',
 'Bahamas',
 'anglais',
 '2',
 'Barbade',
 'anglais',
 '2',
 'Belize',
 'anglais',
 '8',
 'Bermudes\xa0(R.-U.)',
 'anglais',
 '1',
 'Bolivie',
 'espagnol',
 '37',
 'Brésil',
 'portugais',
 '181',
 'Canada',
 'anglais/français',
 '86',
 'Chili',
 'espagnol',
 '9',
 'Colombie',
 'espagnol',
 '80',
 'Costa Rica',
 'espagnol',
 '10',
 'Cuba',
 'espagnol',
 '2',
 'Dominique',
 'anglais',
 '3',
 'Équateur',
 'espagnol',
 '23',
 'États-Unis',
 'anglais',
 '176',
 'Falkland/Malouines\xa0(R.-U.)',
 'anglais',
 '1',
 'Grenade',
 'anglais',
 '3',
 'Guadeloupe (Fr.)',
 'français',
 '4',
 'Groenland (Dan.)',
 'danois/groenlandais',
 '2',
 'Guatemala',
 'espagnol',
 '53',
 'Guyana',
 'anglais',
 '16',
 'Guyane française\xa0(Fr.)',
 'français',
 '12',
 'Haïti',
 'français/créole',
 '2',
 'Honduras',
 'espagnol',
 '10',
 'Jamaïque',
 'anglais',
 '3',
 'Martinique (Fr.)',
 'français',
 '2',
 'Mexique',
 'espagnol',
 '291',
 'Nicaragua',
 'espagnol',
 '7',
 'Panama',
 'espagnol',
 '14',
 'Paraguay',
 'espagnol',
 '20',
 'Pérou',
 'espagnol',
 '92',
 'Porto Rico\xa0(USA)',
 'espagnol/anglais',
 '3',
 'République Dominicaine',
 'espagnol',
 '4',
 'St. Kitts-Nevis',
 'anglais',
 '2',
 'Sainte-Lucie',
 'anglais',
 '2',
 'St-Pierre-et-Miquelon\xa0(Fr.)',
 'français',
 '2',
 'Salvador',
 'espagnol',
 '5',
 'St-Vincent / Grenadines',
 'anglais',
 '2',
 'Surinam',
 'néerlandais',
 '16',
 'Trinité-et-Tobago',
 'anglais',
 '6',
 'Uruguay',
 'espagnol',
 '2',
 'Venezuela',
 'espagnol',
 '40',
 'Vierges américaines\xa0(USA)',
 'anglais',
 '3',
 'Vierges britanniques\xa0(R.-U.)',
 'anglais',
 '2',
 'Australie',
 'anglais',
 '161',
 'Cook, îles (N.-Z.)',
 'anglais',
 '5',
 'Fidji',
 'anglais',
 '10',
 'Guam',
 'anglais/chamorro',
 '10',
 'Hawaï (USA)',
 'anglais/hawaïen',
 '15',
 'Kiribati',
 'anglais/kiribati',
 '2',
 'Mariannes du Nord',
 'anglais',
 '3',
 'Marshall (USA)',
 'anglais',
 '2',
 'Micronésie',
 'anglais',
 '18',
 'Nauru',
 'anglais',
 '3',
 'Niu (N.-Z.)',
 'anglais',
 '2',
 'Nlle-Calédonie (Fr.)',
 'français',
 '39',
 'Nouvelle-Zélande',
 'anglais/maori',
 '4',
 'Norfolk (Austr.)',
 'anglais',
 '2',
 'Palau',
 'anglais',
 '4',
 'Papouasie-Nlle-Guinée',
 'anglais',
 '830',
 'Pâques/Pascua (Chili)',
 'espagnol',
 '2',
 'Pitcairn (R.-U.)',
 'anglais',
 '2',
 'Polynésie française (Fr.)',
 'français',
 '9',
 'Salomon',
 'anglais',
 '5',
 'Samoa américaines (USA)',
 'anglais',
 '2',
 'Samoa occidentales',
 'anglais/samoan',
 '2',
 'Tokelau (N.-Z.)',
 'anglais',
 '2',
 'Tonga',
 'anglais/tonguien',
 '3',
 'Tuvalu',
 'anglais/tuvaluan',
 '2',
 'Vanuatu',
 'anglais/français/bichlamar',
 '108',
 'Wallis-et-Futuna',
 'français',
 '3',
 'Afrique du Sud',
 'anglais/afrikaans',
 '24',
 'Algérie',
 'arabe',
 '18',
 'Angola',
 'portugais',
 '41',
 'Bénin',
 'français',
 '54',
 'Botswana',
 'anglais',
 '20',
 'Burkina Faso',
 'français',
 '68',
 'Burundi',
 'français/kirundi',
 '3',
 'Cameroun',
 'français/anglais',
 '278',
 'Canaries\xa0(Esp.)',
 'espagnol',
 '2',
 'Cap-Vert',
 'portugais',
 '2',
 'Centrafrique',
 'français',
 '71',
 'Comores',
 'français/arabe',
 '6',
 'Congo',
 'français',
 '71',
 'Congo-Kinshasa',
 'français',
 '215',
 'Côte d’Ivoire',
 'français',
 '77',
 'Djibouti',
 'arabe/français',
 '5',
 'Égypte',
 'arabe',
 '11',
 'Érythrée\xa0',
 'arabe/tigrinia',
 '12',
 'Éthiopie',
 'amharique',
 '85',
 'Gabon',
 'français',
 '42',
 'Gambie',
 'anglais',
 '10',
 'Ghana',
 'anglais',
 '79',
 'Guinée',
 'français',
 '34',
 'Guinée-Bissau',
 'portugais',
 '21',
 'Guinée équatoriale',
 'espagnol/français',
 '14',
 'Kenya',
 'anglais/swahili',
 '69',
 'Lesotho',
 'anglais/sésotho',
 '5',
 'Liberia',
 'anglais',
 '30',
 'Libye',
 'arabe',
 '9',
 'Madagascar',
 'malgache/français',
 '17',
 'Madère\xa0(Portugal)',
 'portugais',
 '2',
 'Malawi',
 'anglais',
 '16',
 'Mali',
 'français',
 '56',
 'Maroc',
 'arabe',
 '9',
 'Maurice\xa0',
 'anglais',
 '6',
 'Mauritanie',
 'arabe/français',
 '6',
 'Mayotte\xa0(Fr.)',
 'français',
 '4',
 'Mozambique',
 'portugais',
 '43',
 'Namibie',
 'afrikaans/anglais',
 '28',
 'Niger',
 'français',
 '21',
 'Nigeria',
 'anglais',
 '514',
 'Ouganda',
 'anglais',
 '43',
 'Réunion\xa0(Fr.)',
 'français',
 '3',
 'Rwanda',
 'français/kinyarwanda',
 '3',
 'Saõ Tomé et Principe',
 'portugais',
 '4',
 'Sénégal',
 'français',
 '37',
 'Seychelles',
 'anglais/français/créole',
 '3',
 'Sierra Leone',
 'anglais',
 '24',
 'Somalie',
 'somali',
 '13',
 'Soudan (Nord)',
 'arabe',
 '78',
 'Soudan du Sud',
 'anglais',
 '72',
 'Swaziland',
 'anglais/swati',
 '5',
 'Tanzanie',
 'anglais/swahili',
 '128',
 'Tchad',
 'arabe/français',
 '131',
 'Togo',
 'français',
 '39',
 'Tunisie',
 'arabe',
 '6',
 'Zambie',
 'anglais',
 '43',
 'Zimbabwe',
 'anglais',
 '19']

In [483]:
df_langue = pd.DataFrame(index=range(0,230), columns=['Pays', 'Langues', 'Nombre d\'autres langues parlées'])

count=0
for ele in quotes:
    ligne = count//3
    if count%3==0:
        df_langue.iloc[ligne,0]=ele
    if count%3==1:
        df_langue.iloc[ligne,1]=ele
    if count%3==2:
        df_langue.iloc[ligne,2]=ele
    count=count+1

def langue(string) : 
    return np.array(re.findall('[^\/]+', string))

df_langue['Langues'] = df_langue.Langues.apply(langue)

In [484]:
df_glob_langue = df_langue.merge(df_ISO, how='left', on='Pays')

In [485]:
del df_glob_langue['Pays']
del df_glob_langue['ISO 3166-1 alpha-2']
del df_glob_langue["Nombre d'autres langues parlées"]
del df_glob_langue['ISO 3166-1 numérique']

df_glob_langue.rename(columns={'Country Code': 'geo'}, inplace=True)

In [486]:
df_glob_langue
nouvelle_ligne = pd.DataFrame({'Langues': [['turc', 'grec']], 'geo':['CYP']})
df_glob_langue = df_glob_langue.append(nouvelle_ligne, ignore_index=True)
df_glob_langue

  df_glob_langue = df_glob_langue.append(nouvelle_ligne, ignore_index=True)


Unnamed: 0,Langues,geo
0,[albanais],ALB
1,[allemand],DEU
2,[catalan],AND
3,[allemand],AUT
4,"[néerlandais, français, allemand]",BEL
...,...,...
226,[français],TGO
227,[arabe],TUN
228,[anglais],ZMB
229,[anglais],ZWE


In [487]:
glob_data = data.merge(df_glob_langue, how='left', on='geo')
glob_data.rename(columns={'Langues': 'Langues_geo'}, inplace=True)

In [488]:
df_glob_langue.rename(columns={'geo': 'citizen'}, inplace=True)
# créer une liste de pays avec les langues parlées
truc = [['ARE', ['arabe']], ['ATG', ['anglais']], ['COD', ['français']], ['CAF', ['français']], ['COG', ['français']], 
        ['DOM', ['espagnol']], ['ERI', ['tigrigna']], ['GUY', ['anglais', 'créole']], ['KNA', ['anglais']], 
        ['LBR', ['anglais']], ['MMR', ['birman']], ['MUS', ['anglais', 'français']], ['SDN', ['arabe', 'anglais']], 
        ['SMR', ['italien']], ['SUR', ['néerlandais']], ['STP', ['portugais']], ['SWZ', ['anglais', 'swazi']], 
        ['TLS', ['portugais', 'tétoum']], ['VCT', ['anglais']], ['VNM', ['vietnamien']], ['WSM', ['anglais', 'samoan']], 
       ['BRN',['malais']], ['MHL', ['marshalais', 'anglais']], ['PNG', ['anglais']], ['SLB', ['anglais']]]

# créer le dataframe
df = pd.DataFrame(truc, columns=['citizen', 'Langues'])
df_glob_langue = pd.concat([df_glob_langue,df], ignore_index=True)
df_glob_langue

Unnamed: 0,Langues,citizen
0,[albanais],ALB
1,[allemand],DEU
2,[catalan],AND
3,[allemand],AUT
4,"[néerlandais, français, allemand]",BEL
...,...,...
251,"[anglais, samoan]",WSM
252,[malais],BRN
253,"[marshalais, anglais]",MHL
254,[anglais],PNG


In [489]:
glob_data = glob_data.merge(df_glob_langue, how='left', on='citizen')
glob_data.rename(columns={'Langues': 'Langues_citizen'}, inplace=True)

In [490]:
glob_data[glob_data['Langues_citizen'].isna()]['citizen'].unique()

array([], dtype=object)

In [491]:
A = ['ARE', 'ATG', 'COD', 'CAF', 'COG', 'DOM', 'ESH', 'ERI', 'GUY',
       'KNA', 'LBR', 'MMR', 'MUS', 'SDN', 'SMR', 'SUR', 'SS', 'STP',
       'STLS', 'SWZ', 'TLS', 'UK_OCT', 'VCT', 'VNM', 'WSM', 'XXK']

In [108]:
B = ['ARE', 'AFG', 'ALB', 'ARM', 'AGO', 'ARG', 'AZE', 'BIH', 'BGD',
       'BEL', 'BFA', 'BGR', 'BHR', 'BDI', 'BEN', 'BRA', 'BTN', 'BWA',
       'BLR', 'CAN', 'COD', 'CAF', 'COG', 'CIV', 'CHL', 'CMR', 'CHN',
       'COL', 'CUB', 'CZE', 'DJI', 'DOM', 'DZA', 'EGY', 'GRC', 'ERI',
       'ESP', 'ETH', 'FRA', 'GAB', 'GEO', 'GHA', 'GMB', 'GIN', 'GNQ',
       'GTM', 'GNB', 'HND', 'HRV', 'HTI', 'HUN', 'IDN', 'ISR', 'IND',
       'IRQ', 'IRN', 'ITA', 'JAM', 'JOR', 'KEN', 'KGZ', 'KHM', 'PRK',
       'KOR', 'KWT', 'KAZ', 'LBN', 'LKA', 'LBR', 'LTU', 'LVA', 'LBY',
       'MAR', 'MDA', 'MNE', 'MDG', 'MKD', 'MLI', 'MMR', 'MNG', 'MRT',
       'MWI', 'MEX', 'MYS', 'MOZ', 'NER', 'NGA', 'NIC', 'NLD', 'NOR',
       'NPL', 'OMN', 'PER', 'PHL', 'PAK', 'POL', 'PRT', 'PRY', 'ROU',
       'SRB', 'RUS', 'RWA', 'SAU', 'SDN', 'SVK', 'SLE', 'SEN', 'SOM',
       'SLV', 'SYR', 'TCD', 'TGO', 'THA', 'TJK', 'TKM', 'TUN', 'TUR',
       'TTO', 'TWN', 'TZA', 'UKR', 'UGA', 'USA', 'UZB', 'VEN', 'VNM',
       'YEM', 'ZAF', 'ZMB', 'ZWE', 'ATG', 'AUS', 'BRB', 'BOL', 'DEU',
       'DMA', 'ECU', 'GRD', 'GUY', 'JPN', 'COM', 'KNA', 'LCA', 'MUS',
       'MDV', 'QAT', 'SYC', 'SGP', 'SWZ', 'VCT', 'WSM', 'LAO', 'EST',
       'STP', 'SUR', 'TLS', 'MLT', 'CHE', 'CPV', 'GBR', 'TON', 'CRI',
       'PAN', 'URY', 'BHS', 'LIE', 'SWE', 'SMR', 'BLZ']

In [173]:
resultat = [x for x in A if x not in B]

print(resultat)

['ESH', 'SS', 'STLS', 'UK_OCT', 'XXK']


# PIB
https://donnees.banquemondiale.org/indicator/NY.GDP.PCAP.PP.KD?name_desc=false

J'ai supprimé les 4 premières lignes du csv

In [492]:
pib = pd.read_csv('./PIB/pib.csv', sep=',')

df_glob_pib = pib.merge(df_ISO, how='left', on='Country Code')

del df_glob_pib['Pays']
del df_glob_pib['ISO 3166-1 numérique']
for i in range(1960,2008):
    del df_glob_pib[str(i)]

del df_glob_pib['Country Name']
#del df_glob_pib['Country Code']
del df_glob_pib['ISO 3166-1 alpha-2']
del df_glob_pib['Indicator Name']
del df_glob_pib['Indicator Code']

df_glob_pib.rename(columns={'Country Code':'geo'}, inplace = True)

In [493]:
df_glob_pib = df_glob_pib[~df_glob_pib['geo'].isin(['AND', 'CUB', 'DJI', 'ERI', 'KWT', 'LIE', 'MCO', 'SMR', 'SOM', 'SYR', 'TKM', 'VEN', 'YEM'])]

In [494]:
import wbdata
import datetime
# Définition des paramètres pour la requête API

for country in ['AND','CUB','DJI','ERI','KWT','LIE','MCO','SMR','SOM','SYR','TKM','VEN','YEM']:
    #print(country)
    indicators = {"NY.GDP.PCAP.CD": "GDP per capita"}
    start_year = datetime.datetime(year=2008, month=1, day=1)
    end_year = datetime.datetime(year=2021, month=1, day=1)

    # Récupération des données de la Banque mondiale
    data_bm = wbdata.get_dataframe(indicators, country=country, data_date=(start_year,end_year))
    
    data_bm = data_bm.transpose()
    data_bm['geo'] = country
    df_glob_pib = pd.concat([df_glob_pib, data_bm], ignore_index=True)

In [495]:
geo = df_glob_pib['geo'] 
df_glob_pib = df_glob_pib.iloc[:,1:]
df_glob_pib = df_glob_pib.fillna(method='ffill', axis=1).fillna(method='bfill', axis=1)
df_glob_pib['geo'] = geo

In [496]:
df_glob_pib

Unnamed: 0,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,geo
0,40096.781450,34978.009140,33639.396180,34446.856890,33815.109340,35720.698110,35467.235760,36490.524460,37043.698150,38865.188200,40706.749510,40780.516570,33155.243090,38866.333280,ABW
1,3500.345632,3429.780047,3513.493485,3558.462483,3543.829030,3593.397020,3643.014398,3658.718776,3654.773597,3659.361146,3661.769172,3648.708762,3455.516252,3517.766966,AFE
2,1556.844452,1823.742614,2026.163818,1961.096317,2122.830759,2165.340915,2144.449634,2108.714173,2101.422187,2096.093111,2060.698973,2079.921861,1968.341002,1516.305667,AFG
3,3487.092369,3594.933730,3732.626445,3807.251965,3897.075331,4021.244799,4141.765663,4143.120964,4050.295948,4045.303263,4063.464359,4092.028643,3957.755694,4013.718352,AFW
4,7832.889809,7613.781167,7691.131265,7664.439975,8012.100845,8100.385154,8183.678371,7967.103871,7488.138837,7216.061373,6878.593482,6602.423657,6032.274914,5908.570045,AGO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,356.117902,356.117902,356.117902,356.117902,356.117902,356.117902,377.557387,387.607994,387.186682,377.349072,379.986003,405.786763,416.217775,446.981560,SOM
262,10155.973667,10288.149047,11304.644928,2971.282434,1910.604526,993.739883,1071.234204,857.497868,664.341672,862.319064,1104.392448,1116.679246,533.385232,533.385232,SYR
263,3778.668539,3901.670283,4286.880505,5453.155005,6441.886618,7049.797505,7685.509859,6208.296655,6163.253406,6354.532830,6721.349540,7344.648233,7344.648233,7344.648233,TKM
264,11310.778170,11641.799147,13692.914967,10877.112364,12937.927597,12433.980785,15975.729375,15975.729375,15975.729375,15975.729375,15975.729375,15975.729375,15975.729375,15975.729375,VEN


In [497]:
for i in tqdm(reduced_data.index):
    pays=reduced_data.loc[i,'geo']
    date = str(reduced_data.loc[i,'TIME_PERIOD'])
    temporary_df = df_glob_pib[df_glob_pib['geo']==pays]
    if len(np.array(temporary_df.loc[:,date]))>0 : 
        chiffre_pib = np.array(temporary_df.loc[:,date])
        reduced_data.loc[i,'pib_geo']=chiffre_pib
    else : 
        reduced_data.loc[i,'pib_geo']=np.NaN

df_glob_pib.rename(columns={'geo':'citizen'}, inplace = True)

for i in tqdm(reduced_data.index):
    pays=reduced_data.loc[i,'citizen']
    date = str(reduced_data.loc[i,'TIME_PERIOD'])
    temporary_df = df_glob_pib[df_glob_pib['citizen']==pays]
    if len(np.array(temporary_df.loc[:,date]))>0 : 
        chiffre_pib = np.array(temporary_df.loc[:,date])
        reduced_data.loc[i,'pib_citizen']=chiffre_pib
    else : 
        reduced_data.loc[i,'pib_citizen']=np.NaN

  0%|          | 0/81915 [00:00<?, ?it/s]

  0%|          | 0/81915 [00:00<?, ?it/s]

In [498]:
reduced_data[reduced_data['pib_citizen'].isna()]['citizen'].unique()

array([], dtype=object)

# IDH

https://hdr.undp.org/data-center/documentation-and-downloads

In [499]:
idh = pd.read_csv('./IDH/idh.csv', sep=',')

idh.rename(columns={'iso3': 'Country Code'}, inplace=True)
df_glob_idh = idh.merge(df_ISO, how='left', on='Country Code')

In [500]:
liste_col_à_garder = ['Country Code']
for i in range(2008, 2022):
    liste_col_à_garder.append('hdi_'+str(i))

df_glob_idh = df_glob_idh.loc[:, liste_col_à_garder]

df_glob_idh.loc[158, 'hdi_2008'] = 0.48

df_glob_idh.rename(columns={'Country Code':'geo'}, inplace = True)

geo = df_glob_idh['geo'] 
# Etre sur que 1 est la bonne colonne
df_glob_idh = df_glob_idh.iloc[:,1:]
df_glob_idh = df_glob_idh.fillna(method='ffill', axis=1).fillna(method='bfill', axis=1)
df_glob_idh['geo'] = geo

In [501]:
df_glob_idh.rename(columns={'geo':'pays'}, inplace = True)

for i in tqdm(reduced_data.index):
    pays=reduced_data.loc[i,'geo']
    date = 'hdi_'+str(reduced_data.loc[i,'TIME_PERIOD'])
    temporary_df = df_glob_idh[df_glob_idh['pays']==pays]
    if len(np.array(temporary_df.loc[:,date]))>0 : 
        chiffre_pib = np.array(temporary_df.loc[:,date])
        reduced_data.loc[i,'idh_geo']=chiffre_pib
    else : 
        reduced_data.loc[i,'idh_geo']=np.NaN

#df_glob_idh.rename(columns={'geo':'citizen'}, inplace = True)

for i in tqdm(reduced_data.index):
    pays = reduced_data.loc[i,'citizen']
    date = 'hdi_'+str(reduced_data.loc[i,'TIME_PERIOD'])
    temporary_df = df_glob_idh[df_glob_idh['pays']==pays]
    if len(np.array(temporary_df.loc[:,date]))>0 : 
        chiffre_pib = np.array(temporary_df.loc[:,date])
        reduced_data.loc[i,'idh_citizen']=chiffre_pib
    else : 
        reduced_data.loc[i,'idh_citizen']=np.NaN

  0%|          | 0/81915 [00:00<?, ?it/s]

  0%|          | 0/81915 [00:00<?, ?it/s]

In [502]:
print(reduced_data[reduced_data['idh_geo'].isna()]['geo'].unique())
print(reduced_data[reduced_data['idh_citizen'].isna()]['citizen'].unique())

[]
[]


# Inflation

https://donnees.banquemondiale.org/indicator/NY.GDP.DEFL.KD.ZG?name_desc=false

J'ai supprimé les 4 premières lignes

In [503]:
inflation = pd.read_csv('./inflation/inflation.csv', sep=',')

In [504]:
df_glob_inflation = inflation.merge(df_ISO, how='left', on='Country Code')

liste_col_à_garder = ['Country Code']
for i in range(2008, 2022):
    liste_col_à_garder.append(str(i))

df_glob_inflation = df_glob_inflation.loc[:, liste_col_à_garder]

df_glob_inflation.rename(columns={'Country Code':'geo'}, inplace = True)

geo = df_glob_inflation['geo'] 
# Etre sur que 1 est la bonne colonne
df_glob_inflation = df_glob_inflation.iloc[:,1:]
df_glob_inflation = df_glob_inflation.fillna(method='ffill', axis=1).fillna(method='bfill', axis=1)
df_glob_inflation['geo'] = geo

df_glob_inflation.rename(columns={'geo':'pays'}, inplace = True)

In [413]:
df_glob_inflation

Unnamed: 0,2008,2009,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,pays
0,4.264063,1.703838,-1.216642,4.011814,0.168847,-1.986755,2.335109,2.531592,-1.389713,-1.753935,-1.613834,4.542973,-4.837191,2.216017,ABW
1,11.463040,7.899823,6.128432,9.430270,6.992016,5.844111,5.560736,5.366462,6.400900,5.217431,3.980391,4.456658,4.735355,5.624473,AFE
2,2.096289,-2.163404,3.814630,16.593347,7.301756,4.822785,0.566945,2.447563,-2.197526,2.403656,2.071349,6.521480,7.821667,0.524517,AFG
3,9.576757,1.396219,5.004665,8.789839,4.730095,1.573135,0.280788,1.992846,1.784172,1.759172,3.365101,1.582290,1.118208,6.454910,AFW
4,19.365774,-16.762140,31.688176,31.774075,7.257832,2.844244,3.563413,-3.514972,21.774174,22.617936,28.167029,19.184276,18.161807,33.618592,AGO
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
261,-2.833157,-2.833157,6.392429,6.303690,3.525404,0.353023,1.601439,0.607803,0.779272,0.439986,1.498920,0.962931,1.381107,6.115583,XKX
262,20.377746,-8.705270,23.616292,18.113667,5.893159,9.208957,7.163324,46.476251,0.134411,18.553566,14.853665,14.853665,14.853665,14.853665,YEM
263,7.850332,8.663042,6.128432,5.539056,4.685206,5.844111,5.366546,5.545528,6.952172,5.473322,3.745754,4.638081,5.692615,6.219017,ZAF
264,10.640245,5.559686,13.950913,11.112307,6.992016,9.731210,5.435782,6.659292,13.552485,10.095730,7.411571,7.633470,13.743502,27.585834,ZMB


In [511]:
df_glob_inflation.rename(columns={'Country Code':'pays'}, inplace = True)

for i in tqdm(reduced_data.index):
    pays = reduced_data.loc[i,'geo']
    date = str(reduced_data.loc[i,'TIME_PERIOD'])
    temporary_df = df_glob_inflation[df_glob_inflation['pays']==pays]
    if len(np.array(temporary_df.loc[:,date]))>0 : 
        chiffre_pib = np.array(temporary_df.loc[:,date])
        reduced_data.loc[i,'infla_geo']=chiffre_pib
    else : 
        reduced_data.loc[i,'infla_geo']=np.NaN

#df_glob_inflation.rename(columns={'geo':'citizen'}, inplace = True)

for i in tqdm(reduced_data.index):
    pays = reduced_data.loc[i,'citizen']
    date = str(reduced_data.loc[i,'TIME_PERIOD'])
    temporary_df = df_glob_inflation[df_glob_inflation['pays']==pays]
    if len(np.array(temporary_df.loc[:,date]))>0 : 
        chiffre_pib = np.array(temporary_df.loc[:,date])
        reduced_data.loc[i,'infla_citizen']=chiffre_pib
    else : 
        reduced_data.loc[i,'infla_citizen']=np.NaN

  0%|          | 0/81915 [00:00<?, ?it/s]

  0%|          | 0/81915 [00:00<?, ?it/s]

In [512]:
print(reduced_data[reduced_data['infla_geo'].isna()]['geo'].unique())
print(reduced_data[reduced_data['infla_citizen'].isna()]['citizen'].unique())

[]
[]


# Ajout population

In [513]:
pop = pd.read_csv(".//population//API_SP.POP.TOTL_DS2_en_csv_v2_5351984.csv")

In [514]:
df_glob_pop = pop.merge(df_ISO, how='left', on='Country Code')

del df_glob_pop['Pays']
del df_glob_pop['ISO 3166-1 numérique']
for i in range(1960,2008):
    del df_glob_pop[str(i)]

del df_glob_pop['Country Name']
del df_glob_pop['ISO 3166-1 alpha-2']
del df_glob_pop['Indicator Name']
del df_glob_pop['Indicator Code']

df_glob_pop.rename(columns={'Country Code':'geo'}, inplace = True)

geo = df_glob_pop['geo'] 
# Etre sur que 1 est la bonne colonne
df_glob_pop = df_glob_pop.iloc[:,1:]
df_glob_pop = df_glob_pop.fillna(method='ffill', axis=1).fillna(method='bfill', axis=1)
df_glob_pop['geo'] = geo

In [515]:
df_glob_pop.rename(columns={'geo':'pays'}, inplace = True)

for i in tqdm(reduced_data.index):
    pays = reduced_data.loc[i,'geo']
    date = str(reduced_data.loc[i,'TIME_PERIOD'])
    temporary_df = df_glob_pop[df_glob_pop['pays']==pays]
    if len(np.array(temporary_df.loc[:,date]))>0 : 
        chiffre_pib = np.array(temporary_df.loc[:,date])
        reduced_data.loc[i,'pop_geo']=chiffre_pib
    else : 
        reduced_data.loc[i,'pop_geo']=np.NaN

#df_glob_inflation.rename(columns={'geo':'citizen'}, inplace = True)

for i in tqdm(reduced_data.index):
    pays = reduced_data.loc[i,'citizen']
    date = str(reduced_data.loc[i,'TIME_PERIOD'])
    temporary_df = df_glob_pop[df_glob_pop['pays']==pays]
    if len(np.array(temporary_df.loc[:,date]))>0 : 
        chiffre_pib = np.array(temporary_df.loc[:,date])
        reduced_data.loc[i,'pop_citizen']=chiffre_pib
    else : 
        reduced_data.loc[i,'pop_citizen']=np.NaN

  0%|          | 0/81915 [00:00<?, ?it/s]

  0%|          | 0/81915 [00:00<?, ?it/s]

In [517]:
print(reduced_data[reduced_data['pop_geo'].isna()]['geo'].unique())
print(reduced_data[reduced_data['pop_citizen'].isna()]['citizen'].unique())

[]
[]


# Global merge

In [521]:
"""def t(x):
    return x[0]
reduced_data['pib_citizen'] = reduced_data['pib_citizen'].apply(t)
"""
del reduced_data['citizen']
del reduced_data['geo']
del reduced_data['TIME_PERIOD']
#del reduced_data['info']

glob_data_final = glob_data.merge(reduced_data, how='left', on='info_bis')

In [522]:
glob_data_final

Unnamed: 0,DATAFLOW,LAST UPDATE,freq,citizen,sex,unit,age,asyl_app,geo,TIME_PERIOD,...,Langues_citizen,info_y,pib_geo,pib_citizen,idh_geo,idh_citizen,infla_geo,infla_citizen,pop_geo,pop_citizen
0,ESTAT:MIGR_ASYAPPCTZA(1.0),21/11/22 23:00:00,A,AND,F,PER,UNK,ASY_APP,AUT,2008,...,[catalan],"(AND, AUT, 2008)",53166.05420,53719.421267,0.898,0.850,1.956322,2.274338,8321496.0,76055.0
1,ESTAT:MIGR_ASYAPPCTZA(1.0),21/11/22 23:00:00,A,AND,F,PER,UNK,ASY_APP,AUT,2009,...,[catalan],"(AND, AUT, 2009)",51030.72468,49753.690600,0.898,0.848,1.889264,0.132136,8343323.0,73852.0
2,ESTAT:MIGR_ASYAPPCTZA(1.0),21/11/22 23:00:00,A,AND,F,PER,UNK,ASY_APP,AUT,2010,...,[catalan],"(AND, AUT, 2010)",51843.42833,48238.466096,0.902,0.848,0.873055,0.374314,8363404.0,71519.0
3,ESTAT:MIGR_ASYAPPCTZA(1.0),21/11/22 23:00:00,A,AND,F,PER,UNK,ASY_APP,AUT,2011,...,[catalan],"(AND, AUT, 2011)",53179.14746,51429.191920,0.905,0.849,1.833410,0.196764,8391643.0,70567.0
4,ESTAT:MIGR_ASYAPPCTZA(1.0),21/11/22 23:00:00,A,AND,F,PER,UNK,ASY_APP,AUT,2012,...,[catalan],"(AND, AUT, 2012)",53297.44479,44904.580043,0.906,0.869,2.054236,0.174557,8429991.0,71013.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1441342,ESTAT:MIGR_ASYAPPCTZA(1.0),21/11/22 23:00:00,A,ZWE,UNK,PER,Y_LT14,ASY_APP,GBR,2015,...,[anglais],"(ZWE, GBR, 2015)",44948.33597,2313.878553,0.924,0.582,0.712571,0.367420,65116219.0,14154937.0
1441343,ESTAT:MIGR_ASYAPPCTZA(1.0),21/11/22 23:00:00,A,ZWE,UNK,PER,Y_LT14,ASY_APP,GBR,2016,...,[anglais],"(ZWE, GBR, 2016)",45574.84782,2286.623549,0.927,0.588,1.878610,2.014095,65611593.0,14452704.0
1441344,ESTAT:MIGR_ASYAPPCTZA(1.0),21/11/22 23:00:00,A,ZWE,UNK,PER,Y_LT14,ASY_APP,GBR,2017,...,[anglais],"(ZWE, GBR, 2017)",46372.38660,2331.780833,0.930,0.594,1.791164,3.056905,66058859.0,14751101.0
1441345,ESTAT:MIGR_ASYAPPCTZA(1.0),21/11/22 23:00:00,A,ZWE,UNK,PER,Y_LT14,ASY_APP,GBR,2018,...,[anglais],"(ZWE, GBR, 2018)",46878.13496,2399.621551,0.929,0.602,1.737853,200.769578,66460344.0,15052184.0


In [523]:
glob_data_final.to_csv('glob_data_final.csv', sep=',')