In [15]:
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.cluster import KMeans
import numpy as np
import pycountry
import mysql.connector

In [16]:
#!/usr/bin/env /Applications/MAMP/Library/bin/python

config = {
  'user': 'root',
  'password': 'root',
  'host': '127.0.0.1',
  'port': 8889,
  'database': 'ecotourisme',
  'raise_on_warnings': True
}

cnx = mysql.connector.connect(**config)

cursor = cnx.cursor(dictionary=True)

cursor.execute('SELECT pays.nom as pays, s.id_pays, s.annee, s.gpi, t.arriveesTotal as arriveesTotal, e.pibParHab, eco.co2, eco.GES_hab as gesParHab, dv.Value FROM surete as s, tourisme as t, economie as e, ecologie as eco, pays, developpement_humain as dv where pays.id = s.id_pays and s.id_pays = t.id_pays AND t.id_pays = e.id_pays AND e.id_pays = eco.id_pays AND eco.id_pays = dv.iso_code AND s.annee = t.annee AND t.annee = e.annee AND e.annee = eco.annee AND e.annee=2019;')
results = cursor.fetchall()


data = []
for row in results:
    pays = row['pays']
    id_pays = row['id_pays']
    annee = row['annee']
    gpi = row['gpi']
    arriveesTotal = row['arriveesTotal']
    pibParHab = row['pibParHab']
    co2 = row['co2']
    gesParHab = row['gesParHab']
    idh = row['Value']
    data.append({'pays': pays,'id_pays': id_pays, 'arriveesTotal': arriveesTotal, 'gpi': gpi, 'pibParHab': pibParHab, 'co2': co2, 'gesParHab': gesParHab, 'idh': idh, 'annee': annee})


cnx.close()

In [17]:
# Créer le DataFrame
df = pd.DataFrame(data)

# Normaliser les indicateurs
df['arriveesTotal'] = df['arriveesTotal'] / df['arriveesTotal'].max()
df['gpi'] = df['gpi'].min() / df['gpi']
df['pibParHab'] = df['pibParHab'] / df['pibParHab'].max()
df['co2'] = df['co2'].min() / df['co2']
df['gesParHab'] = df['gesParHab'].min() / df['gesParHab']
df['idh'] = df['idh'].astype(str).str.replace(',', '.').astype(float)
df['idh'] = df['idh'] / df['idh'].max()

# Supprimer les valeurs manquantes
df = df.dropna()

CLUSTERING 

In [18]:
# Sélectionner les variables pour le clustering
X = df[['arriveesTotal', 'gpi', 'pibParHab', 'co2', 'gesParHab', 'idh']]

# Créer le modèle
kmeans = KMeans(n_clusters=3, random_state=42)

# Faire le clustering
kmeans.fit(X)

# Ajouter les labels de clusters dans le DataFrame
df['cluster'] = kmeans.labels_

df = df.sort_values(by='cluster', ascending=False)

centroids = pd.DataFrame(kmeans.cluster_centers_, columns=X.columns)

# Afficher les centroïdes
centroids


Unnamed: 0,arriveesTotal,gpi,pibParHab,co2,gesParHab,idh
0,0.066184,0.561485,0.214338,0.014145,0.114099,0.822971
1,0.011514,0.522428,0.04489,0.098645,0.268381,0.588459
2,0.210164,0.721674,0.566448,0.00869,0.062704,0.952226


In [19]:
# Fonction pour convertir ISO-2 en ISO-3
def iso2_to_iso3(iso2):
    try:
        return pycountry.countries.get(alpha_2=iso2).alpha_3
    except (AttributeError, LookupError):
        return np.nan
    
# Appliquer la fonction à la colonne 'id_pays'
df['id_pays'] = df['id_pays'].apply(iso2_to_iso3)

# Créer une carte du monde
fig = px.choropleth(df, locations='id_pays', color='cluster', hover_name='pays', projection='natural earth')

# Mettre à jour la carte
fig.update_geos(showcountries=True, countrycolor="Black", showland=True, showocean=True, oceancolor="LightBlue", showlakes=True, lakecolor="Blue")

# Afficher la carte
fig.show()


In [20]:
# Créer une figure
fig = go.Figure()

# Ajouter les points pour chaque cluster
for cluster in df['cluster'].unique():
    cluster_df = df[df['cluster'] == cluster]
    fig.add_trace(go.Scatter3d(x=cluster_df['pibParHab'],
                               y=cluster_df['gesParHab'],
                               z=cluster_df['gesParHab'],
                               mode='markers+text',
                               marker=dict(size=4),
                               name=f'Cluster {cluster}'))
    
# Ajouter les centroïdes
fig.add_trace(go.Scatter3d(x=centroids['pibParHab'],
                           y=centroids['gesParHab'],
                           z=centroids['gpi'],
                           mode='markers',
                           marker=dict(size=6, color='black', symbol='x'),
                           name='Centroid'))

# Mettre à jour les axes
fig.update_scenes(xaxis_title='pibParHab',
                  yaxis_title='gesParHab',
                  zaxis_title='GPI')

# Afficher la figure
fig.show()