En faisant le produit avec la matrice adaptative si jamais un pays se retrouve seul dans une classe, toute la colonne de cette classe se retrouve à zéro.

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F

import numpy as np
from sklearn.cluster import KMeans
import pandas as pd
from sklearn.decomposition import PCA

from adjency_matrix import AdaptativeMatrix, DTW,AgeReduction
from gt_a_model import GCNLayer, GCNMultiLayer, Encoder

In [2]:
AM = AdaptativeMatrix()
dtw = DTW()
AR = AgeReduction()

In [24]:
def adjency_matrix(X: torch.Tensor, coord_countries: torch.Tensor):
    """
    X is 3D tensor
    coord_countries is 2D matrix contains longitude and latitude of each country. 1st column represents long and 2nd represents lat
    """
    n_countries  = X.shape[0]
    clusters = AM.adaptative_matrix(X)
    dtw_matrix = dtw.DTW_Matrix(X)
    long_lat_matrix = np.empty((n_countries, n_countries))
    for i in range(n_countries):
        for j in range(n_countries):
            long_lat_matrix[i,j] = torch.sqrt(torch.mean((coord_countries[i]-coord_countries[j])**2))
    
    # 
    adjency_mat = dtw_matrix * long_lat_matrix
    #print("adj_brut_matrix \n",adjency_mat)
    for i in range(n_countries):
        for j in range(n_countries):
            if clusters[i] != clusters[j]:
                adjency_mat[i,j] = 0.005
    
    #print("long_lat \n",long_lat_matrix)
    print("clusters \n",clusters)
    return adjency_mat


In [25]:
coord_countries = torch.rand(8,2)
X = torch.rand((8,5,3))

In [32]:
adjency_matrix(X,coord_countries)

The variance explained by principal components:  [0.75297127]
The variance explained by principal components:  [0.52076656]
The variance explained by principal components:  [0.76120859]
The variance explained by principal components:  [0.82852733]
The variance explained by principal components:  [0.76143834]
The variance explained by principal components:  [0.73838258]
The variance explained by principal components:  [0.40042795]
The variance explained by principal components:  [0.62651635]
The variance explained by principal components:  [0.75297127]
The variance explained by principal components:  [0.52076656]
The variance explained by principal components:  [0.76120859]
The variance explained by principal components:  [0.82852733]
The variance explained by principal components:  [0.76143834]
The variance explained by principal components:  [0.73838258]
The variance explained by principal components:  [0.40042795]
The variance explained by principal components:  [0.62651635]
clusters

tensor([[0.0000, 0.0050, 0.0050, 0.3926, 0.2782, 0.0050, 0.4142, 0.0050],
        [0.0050, 0.0000, 0.0050, 0.0050, 0.0050, 0.0050, 0.0050, 0.1196],
        [0.0050, 0.0050, 0.0000, 0.0050, 0.0050, 0.1616, 0.0050, 0.0050],
        [0.3926, 0.0050, 0.0050, 0.0000, 0.1840, 0.0050, 0.1971, 0.0050],
        [0.2782, 0.0050, 0.0050, 0.1840, 0.0000, 0.0050, 0.1523, 0.0050],
        [0.0050, 0.0050, 0.1616, 0.0050, 0.0050, 0.0000, 0.0050, 0.0050],
        [0.4142, 0.0050, 0.0050, 0.1971, 0.1523, 0.0050, 0.0000, 0.0050],
        [0.0050, 0.1196, 0.0050, 0.0050, 0.0050, 0.0050, 0.0050, 0.0000]],
       dtype=torch.float64)

### 2) Importation des données de mortalité


In [83]:
### import mortality data for the different countries
import os
 
directory = '/Users/gojelastat/Desktop/Thèse/Projet 2/Données/Données GT-A'
data={}
for filename in os.listdir(directory):
    f = os.path.join(directory, filename)
    if os.path.isfile(f) and filename.endswith('.txt'):
        data[filename.split('.')[0]]=pd.read_csv(f,header=1,delimiter="\s+") ### filename.split('.')[0] split the name by . and recover the code of the country
        ### in this line, we fill the dictionary "data" with the different data of country and their code

In [84]:
len(data.keys())

23

### 3) Mettre les données au format adéquat 
C'est-à-dire sous forme de tenseur de taille M x T x A où M est le nombre de pays, T est la dimension du temps et A est la dimension de l'âge. Rappelons que ceci est fait pour s'adapter au cadre d'implémentation du modèle GT-A et ici on travaille avec les données des **hommes**

In [85]:
#fonction permettant de transformer les données dans la bonne numérisation
def __data_load(data):
    columns=data.columns
    data['Age']=np.where(data['Age']!='110+',data['Age'],111)
    for col in columns:
        data[col]=np.where(data[col]!='.',data[col],9999)
        data[col]=pd.to_numeric(data[col])

    data=data[data['Age']<100]
    data=data[data['Year']>=1950]
    data=data[data['Year']<=2010]

    #df_min=np.min(data['Male'])
    #df_max=np.max(data['Male'])

    #data['Male']=(data['Male']-df_min)/(df_max-df_min)
    data.index=np.arange(data.shape[0])  ### renommer les index de 0 jusqu'à la taille de data
    return data#,df_min,df_max



### Transform the based dataset to matrix of mortality rates of male people in this case. 
#The matrix has age on row and year on columns
def __data_reshaping(data,Gender="Male"):
    data=__data_load(data)
    mat=pd.DataFrame(index=np.unique(data['Age']),columns=np.unique(data['Year']))
    n=0
    for j in range(mat.shape[1]):
        for i in range(mat.shape[0]):
            mat.iloc[mat.index[i],mat.index[j]]=data.loc[n+i,Gender]
        n=n+mat.shape[0]
    years=np.arange(1950,(1950+mat.shape[1]))
    ages=np.arange(0,(mat.shape[0]))

    #for col in years:
    #    mat[col]=pd.to_numeric(mat[col])
    return np.array(mat.T, dtype=np.float32) # ici on essaie de convertir en un tableau floattant, la transposée c'est parce que le code
    # était fait pour sortir une matrice de taille A x T.



In [86]:
T = 2010 - 1950 + 1
A = 99 - 0 + 1
X = torch.FloatTensor(len(data.keys()), T, A)

countries = data.keys()
for i,country in enumerate(countries):
    X[i]= torch.tensor(__data_reshaping(data[country]))

In [96]:
data.keys()

dict_keys(['PRT', 'GBR_NP', 'BEL', 'AUS', 'ESP', 'ISL', 'AUT', 'NZL_NP', 'HUN', 'FRACNP', 'NLD', 'UK', 'DNK', 'USA', 'JPN', 'SWE', 'FIN', 'CZE', 'ITA', 'BGR', 'SVK', 'IRL', 'CAN'])

#### Coordonnées géographique des pays 
Ici on oriente le globe en un repère orthonormé avec l'axe des abscisses orienté vers l'Est et l'axe des ordonnées orienté vers le Nord. Ces coordonnées ont été pris sur wikipédia. Dans la base de données HMD, il y a la base de données par régions du UK et pour toute la population UK. On a choisi d'utiliser la base du UK total.

In [73]:
country = ['AUT','AUS','GBR_NP','ISL','NZL_NP', 'HUN', 'DNK', 'USA', 'JPN',
           'SWE', 'FIN', 'CZE', 'BGR', 'SVK', 'IRL', 'CAN', 'PRT', 'BEL', 'ESP',
           'FRA', 'NLD', 'ITA']
coord = np.array([[47.20, 13.20],[-27.00, 133.00], [54.00, -2.30], [65.00, -18.00],
                 [-41.00, 174.00], [47.00, 20.00], [56.00, 10.00], [38.00, -97.00],
                 [36.00, 138.00], [62.00, 15.00], [64.00, 26.00], [49.00, 15.00],
                 [43.00, 25.00], [48.40, 19.30], [53.00, -8.00], [60.00, -95.00],
                 [39.30, -8.00], [50.50, 4.00], [40.00, -4.00], [47.00, 2.00], 
                 [51.55, 5.34]])

geo_coord = pd.DataFrame(coord, index=country, columns=['Latitude', 'Longitude'])
geo_coord = geo_coord.loc[list(data.keys())]
geo_coord

Unnamed: 0,Latitude,Longitude
GBR_NP,54.0,-2.3
AUS,-27.0,133.0
ISL,65.0,-18.0
AUT,47.2,13.2
NZL_NP,-41.0,174.0
HUN,47.0,20.0
DNK,56.0,10.0
USA,38.0,-97.0
JPN,36.0,138.0
SWE,62.0,15.0


### 4) Calcul de la matrice $A_{lat-long}$

In [77]:
np.sum((geo_coord.loc['AUT']-geo_coord.loc['IRL'])**2)

483.08

In [80]:
(geo_coord.loc['AUT']-geo_coord.loc['IRL'])**2

Latitude      33.64
Longitude    449.44
dtype: float64

In [82]:
n_countries = len(data.keys())
A_lat_long = torch.FloatTensor(n_countries, n_countries)
for i, country_1 in enumerate(data.keys()):
    for j, country_2 in enumerate(data.keys()):
        A_lat_long[i,j] = np.sqrt(np.mean((geo_coord.loc[country_1]-geo_coord.loc[country_2])**2))

A_lat_long

tensor([[  0.0000, 111.5058,  13.5553,  11.9685, 141.6098,  16.5271,   8.8116,
          67.9120, 100.0202,  13.4776,  21.2237,  12.7336,  20.8121,  15.7785,
           4.0921,  65.6860],
        [111.5058,   0.0000, 125.0300,  99.6436,  30.6349,  95.5118, 104.9238,
         169.0044,  44.6878, 104.5108,  99.3227,  99.2472,  91.0055,  96.4698,
         114.6320, 172.5587],
        [ 13.5553, 125.0300,   0.0000,  25.3996, 155.0806,  29.7321,  20.7966,
          59.0339, 112.1985,  23.4307,  31.1207,  25.9326,  34.1541,  28.8691,
          11.0454,  54.5619],
        [ 11.9685,  99.6436,  25.3996,   0.0000, 129.6840,   4.8104,   6.6212,
          78.1942,  88.6016,  10.5423,  14.9345,   1.8000,   8.8566,   4.3960,
          15.5416,  77.0425],
        [141.6098,  30.6349, 155.0806, 129.6840,   0.0000, 125.4193, 134.7312,
         199.6021,  60.1041, 133.9590, 128.3141, 129.1917, 120.9483, 126.3417,
         144.8447, 203.1773],
        [ 16.5271,  95.5118,  29.7321,   4.8104, 125.4193,  

In [101]:
from geopy.geocoders import Nominatim

# Créer un objet geolocator
geolocator = Nominatim(user_agent="geoapiExercises")

# Fonction pour obtenir les coordonnées d'un pays
def get_country_coordinates(country_name):
    location = geolocator.geocode(country_name)
    if location:
        return (location.latitude, location.longitude)
    else:
        return "Pays non trouvé"

# Exemple : Obtenir les coordonnées de la France
country = "France"
coordinates = get_country_coordinates(country)
print(f"Coordonnées de {country} : {coordinates}")

# Exemple : Obtenir les coordonnées du Brésil
country = "Pays-bas"
coordinates = get_country_coordinates(country)
print(f"Coordonnées de {country} : {coordinates}")


Coordonnées de France : (46.603354, 1.8883335)
Coordonnées de Pays-bas : (52.24764975, 5.541246849406163)
