In [6]:
# Importing libraries neeeded for the project
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from sklearn.preprocessing import MinMaxScaler


In [7]:
#!/usr/bin/env /Applications/MAMP/Library/bin/python

import mysql.connector

config = {
  'user': 'root',
  'password': 'root',
  'host': '127.0.0.1',
  'port': 8889,
  'database': 'ecotourisme',
  'raise_on_warnings': True
}

cnx = mysql.connector.connect(**config)

cursor = cnx.cursor(dictionary=True)

cursor.execute('SELECT pays.nom as pays, s.id_pays, s.annee, s.gpi, t.arriveesTotal as arriveesTotal, t.arriveesAvion, e.pibParHab, eco.co2, eco.GES_hab as gesParHab, eco.elecRenew as energie  FROM surete as s, tourisme as t, economie as e, ecologie as eco, pays where pays.id = s.id_pays and s.id_pays = t.id_pays AND t.id_pays = e.id_pays AND e.id_pays = eco.id_pays AND s.annee = t.annee AND t.annee = e.annee AND e.annee = eco.annee')
results = cursor.fetchall()


#cursor.execute('SELECT pays.nom as pays, s.id_pays, AVG(s.gpi) as gpi, AVG(t.arriveesTotal) as arriveesTotal, AVG(t.arriveesAvion) as arriveesAvion, AVG(e.pibParHab) as pibParHab, AVG(eco.co2) as co2, AVG(eco.GES_hab) as gesParHab FROM surete AS s JOIN tourisme AS t ON s.id_pays = t.id_pays AND s.annee = t.annee JOIN economie AS e ON s.id_pays = e.id_pays AND s.annee = e.annee JOIN ecologie AS eco ON s.id_pays = eco.id_pays AND s.annee = eco.annee JOIN pays ON pays.id = s.id_pays GROUP BY pays.id ORDER BY `s`.`id_pays` ASC;')
#results = cursor.fetchall()

data = []
for row in results:
    pays = row['pays']
    id_pays = row['id_pays']
    annee = row['annee']
    gpi = row['gpi']
    arriveesTotal = row['arriveesTotal']
    arriveesAvion = row['arriveesAvion']
    pibParHab = row['pibParHab']
    co2 = row['co2']
    gesParHab = row['gesParHab']
    energie = row['energie']
    data.append({'pays': pays,'id_pays': id_pays, 'annee': annee, 'arriveesTotal': arriveesTotal, 'arriveesAvion': arriveesAvion, 'gpi': gpi, 'pibParHab': pibParHab, 'co2': co2, 'gesParHab': gesParHab, 'annee': annee, 'energie' : energie})

    

cnx.close()

In [8]:
# Créer le DataFrame
df = pd.DataFrame(data)

In [10]:

# Normaliser les indicateurs
df['arriveesTotal'] = df['arriveesTotal'] / df['arriveesTotal'].max()
df['arriveesAvion'] = df['arriveesTotal'] / df['arriveesAvion'].min()
df['gpi'] = df['gpi'].min() / df['gpi']
df['pibParHab'] = df['pibParHab'] / df['pibParHab'].max()
df['co2'] = df['co2'].min() / df['co2']
df['gesParHab'] = df['gesParHab'].min() / df['gesParHab']
df['energie'] = df['energie'] / df['energie'].max()

# Définir les poids pour chaque variable
poids = {
    'arriveesTotal': 2,
    'gpi': 4,
    'pibParHab': 2,
    'gesParHab': 6, 
    'energie': 6

}

# Calculer le score touristique avec des poids
# Calculer le score touristique avec des poids
df['score_touristique'] = df.apply(lambda row: sum(float(row[var]) * poids[var] for var in poids) / sum(poids.values()), axis=1)
# Afficher le DataFrame
df.head()

Unnamed: 0,pays,id_pays,annee,arriveesTotal,arriveesAvion,gpi,pibParHab,co2,gesParHab,energie,score_touristique
0,Angola,AO,2008,0.001349,0.000123,0.54897,0.070013,0.006257,0.06048,0.763547,0.364138
1,Albanie,AL,2008,0.006517,0.000592,0.610274,0.088613,0.028649,0.144669,0.981723,0.469485
2,Argentine,AR,2008,0.021572,0.001961,0.600549,0.205506,0.000721,0.034817,0.272593,0.23504
3,Arménie,AM,2008,0.002561,0.000233,0.515566,0.091187,0.021212,0.152367,0.325411,0.255822
4,Australie,AU,2008,0.025638,0.002331,0.783513,0.400223,0.000311,0.011013,0.080549,0.226757


In [11]:
# On enlève les colonnes que l'on veut pas normaliser 
columns_to_normalize = df.columns.drop(['pays', 'id_pays', 'annee', 'score_touristique'])

# Créer un objet MinMaxScaler, qui sert à normaliser les données, les valeurs sont comprises entre 0 et 1
scaler = MinMaxScaler()

# Normaliser les données avec seulement les colonnes contenants les données importantes à normaliser 
normalized_data = scaler.fit_transform(df[columns_to_normalize])

# Créer un DataFrame Pandas à partir des données normalisées
normalized_df = pd.DataFrame(normalized_data, columns=columns_to_normalize)

# Ajout des colonnes 'pays', 'id_pays', 'annee' au DataFrame normalisé
normalized_df[['pays', 'id_pays', 'annee', 'score_touristique']] = df[['pays', 'id_pays', 'annee', 'score_touristique']]

df[columns_to_normalize] = normalized_df[columns_to_normalize]

df.head()



Unnamed: 0,pays,id_pays,annee,arriveesTotal,arriveesAvion,gpi,pibParHab,co2,gesParHab,energie,score_touristique
0,Angola,AO,2008,0.001299,0.001299,0.333321,0.063728,0.006246,0.059189,0.763547,0.364138
1,Albanie,AL,2008,0.006467,0.006467,0.423935,0.082454,0.028639,0.143494,0.981723,0.469485
2,Argentine,AR,2008,0.021522,0.021522,0.409561,0.200137,0.00071,0.03349,0.272593,0.23504
3,Arménie,AM,2008,0.002511,0.002511,0.283945,0.085045,0.021201,0.151203,0.325411,0.255822
4,Australie,AU,2008,0.025589,0.025589,0.680004,0.39617,0.0003,0.009653,0.080549,0.226757


In [12]:
max_score = df['score_touristique'].max()
min_score = df['score_touristique'].min()

# Définir les limites des compartiments
bins = [0, 0.19, 0.25, 0.3, 0.35, max_score]

print(max_score, min_score)


# Définir les labels pour les compartiments
labels = ['E', 'D', 'C', 'B', 'A']

# Créer une nouvelle colonne score
df['score'] = pd.cut(df['score_touristique'], bins=bins, labels=labels)

df = df.sort_values(by='score_touristique', ascending=False)

# Afficher le DataFrame
df.head(50)

# nombre de pays ayant un score A
df['score'].value_counts()

0.6943981514178927 0.09293117388263897


score
A    467
E    416
D    339
C    259
B    223
Name: count, dtype: int64

In [13]:
import pycountry
import numpy as np

# Fonction pour convertir ISO-2 en ISO-3
def iso2_to_iso3(iso2):
    try:
        return pycountry.countries.get(alpha_2=iso2).alpha_3
    except AttributeError:
        return np.nan

# Appliquer la fonction à la colonne 'id_pays'
df['id_pays'] = df['id_pays'].apply(iso2_to_iso3)

df.head()

# Créer une carte du monde
fig = px.choropleth(df, locations='id_pays', color='score', hover_name='pays', projection='natural earth')

# Mettre à jour la carte
fig.update_geos(showcountries=True, countrycolor="Black", showland=True, showocean=True, oceancolor="LightBlue", showlakes=True, lakecolor="Blue")

# Afficher la carte
fig.show()


In [14]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA

# Utiliser vos données déjà extraites et normalisées
df = df.set_index('annee')
# Assurez-vous que votre DataFrame pandas contient les colonnes nécessaires (année, critères, etc.)

# Fractionnement des données en ensembles d'entraînement et de test
train_data = df[df['annee'] <= 2010]  # Données jusqu'en 2020 pour l'entraînement
test_data = df[df['annee'] > 2010]    # Données à partir de 2021 pour les tests

# Entraînement du modèle ARIMA
model = ARIMA(train_data['gesParHab'], order=(5,1,0))  # Assurez-vous de remplacer 'critère' par la colonne que vous voulez prédire
model_fit = model.fit()

# Réduire le nombre de pas de prédiction
forecast_steps = min(len(test_data), 10)  # Par exemple, prédire 10 pas à l'avance maximum

# Prédictions futures
forecast = model_fit.forecast(steps=forecast_steps)

# Affichage des prédictions
print(forecast)

KeyError: 'annee'