In [3]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import OrdinalEncoder, StandardScaler
import numpy as np
from sklearn.linear_model import SGDClassifier
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV
from sklearn.svm import SVC

import pandas as pd
import folium
import joblib  


# I - Préparation des Données 

In [55]:
#Extraction des données d’intérêt 
file_arbre = 'Data_Arbre.csv'
arbre = pd.read_csv(file_arbre)

#les colonnes pertinentes
colonnes_pertinentes = ["longitude", "latitude","haut_tot", "haut_tronc", "tronc_diam", 
                        "feuillage", "fk_arb_etat", "age_estim"]
data_arbre = arbre[colonnes_pertinentes].copy() #copie
print(data_arbre.head())  # 5 premiers elements
#data_arbre.describe()

#distributions 
data_arbre.hist(bins=50, figsize=(20,15))



Employee data : 
      longitude   latitude              clc_quartier              clc_secteur  \
0      3.293264  49.840500  Quartier du Centre-Ville              Quai Gayant   
1      3.273380  49.861409    Quartier du Vermandois               Stade Cepy   
2      3.289068  49.844513  Quartier du Centre-Ville    Rue Villebois Mareuil   
3      3.302387  49.861778      Quartier de l'Europe   Square des Marronniers   
4      3.304047  49.858446      Quartier de l'Europe            Avenue Buffon   
...         ...        ...                       ...                      ...   
7404   3.283505  49.848695  Quartier du Centre-Ville     Place Edouard Branly   
7405   3.283436  49.848682  Quartier du Centre-Ville     Place Edouard Branly   
7406   3.297078  49.860413      Quartier de l'Europe                rue Hertz   
7407   3.291685  49.848009  Quartier du Centre-Ville      Square rue Fréreuse   
7408   3.301788  49.861503      Quartier de l'Europe  square des marronniers    

      haut

In [63]:
print("Données originales:")
print(data_arbre.head())
ordinal_encoder = OrdinalEncoder()

# Encoder les colonnes catégorielles pertinentes
data_arbre[['feuillage', 'fk_arb_etat']] = ordinal_encoder.fit_transform(data_arbre[['feuillage', 'fk_arb_etat']])

# Afficher les données encodées
print("\nDonnées après encodage ordinal:")
print(data_arbre.head())


Données originales:
   longitude   latitude              clc_quartier             clc_secteur  \
0   3.293264  49.840500  Quartier du Centre-Ville             Quai Gayant   
1   3.273380  49.861409    Quartier du Vermandois              Stade Cepy   
2   3.289068  49.844513  Quartier du Centre-Ville   Rue Villebois Mareuil   
3   3.302387  49.861778      Quartier de l'Europe  Square des Marronniers   
4   3.304047  49.858446      Quartier de l'Europe           Avenue Buffon   

   haut_tot  haut_tronc  tronc_diam  feuillage  fk_arb_etat  age_estim  
0       6.0         2.0        37.0        1.0          1.0       15.0  
1      13.0         1.0       160.0        0.0          1.0       50.0  
2      12.0         3.0       116.0        1.0          4.0       30.0  
3      16.0         3.0       150.0        1.0          1.0       50.0  
4       5.0         2.0       170.0        1.0          2.0       40.0  

Données après encodage ordinal:
   longitude   latitude              clc_quart

In [81]:
x = data_arbre.drop(columns=['fk_arb_etat']) #features
y = data_arbre['fk_arb_etat'] #target

scaler = StandardScaler()
x_scaled = scaler.fit_transform(x) # normaliser les features

# convertir X_scaled en DataFrame 
x_scaled_df = pd.DataFrame(x_scaled, columns=x.columns)

# Afficher les caractéristiques normalisées
print("\nCaractéristiques normalisées:")
print(x_scaled_df.head())

# Afficher la cible
print("\nCible:")
print(y)




Caractéristiques normalisées:
   longitude  latitude  haut_tot  haut_tronc  tronc_diam  feuillage  \
0  -0.121047 -1.027327 -0.920963    -0.46555   -1.148795   0.416219   
1  -1.427386  0.976923  0.213100    -1.02441    0.934743  -2.402581   
2  -0.396705 -0.642658  0.051091     0.09331    0.189412   0.416219   
3   0.478338  1.012295  0.699128     0.09331    0.765349   0.416219   
4   0.587422  0.692942 -1.082973    -0.46555    1.104136   0.416219   

   age_estim  clc_quartier_OMISSY  clc_quartier_Quartier Remicourt  \
0  -0.968413            -0.303792                        -0.498143   
1   0.822235            -0.303792                        -0.498143   
2  -0.200993            -0.303792                        -0.498143   
3   0.822235            -0.303792                        -0.498143   
4   0.310621            -0.303792                        -0.498143   

   clc_quartier_Quartier Saint-Jean  ...  clc_secteur_Stade Marcel Bienfait  \
0                         -0.304322  ...  

# II - Apprentissage Supervisé pour la régression

In [None]:
# séparation des données et de test
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

# mise à l'échelle des caractéristiques (variables explicatives)
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)  
x_test_scaled = scaler.transform(x_test)  

## Stochastic Gradient Descent

In [100]:

sgd_clf = SGDClassifier()
sgd_clf.fit(x_train_scaled, y_train)  # entraînement du modèle sur les données d'entraînement mises à l'échelle
y_pred = sgd_clf.predict(x_test_scaled)  # prédiction sur l'ensemble de test

matrix = confusion_matrix(y_test, y_pred)  
accuracy = accuracy_score(y_test, y_pred)  # exactitude
# Pour une classification multiclasse, spécifiez le paramètre 'average'
recall = recall_score(y_test, y_pred, average='weighted')  
f1 = f1_score(y_test, y_pred, average='weighted') 
precision = precision_score(y_test, y_pred, average='weighted')  

scores = cross_val_score(sgd_clf, x_train_scaled, y_train, cv=3, scoring='accuracy')  # cv = nombre de folds pour la validation croisée

print("Scores de précision pour chaque fold :", scores)
print("Précision moyenne :", scores.mean())

print("Matrice de confusion :\n", matrix)  
print("\nExactitude :", accuracy) 
print("\nPrécision :", precision)  
print("\nRappel :", recall)  
print("\nF1-score :", f1)  


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Scores de précision pour chaque fold : [0.89271255 0.91093117 0.8921519 ]
Précision moyenne : 0.8985985411435111
Matrice de confusion :
 [[   1    6    0    0    0    0]
 [   2 1298    6    0   17   15]
 [   0   21    4    0    0    0]
 [   0    4    0    0    0    2]
 [   0   17    1    0   10    0]
 [   0   53    3    0    3   19]]

Exactitude : 0.8987854251012146

Précision : 0.8781241905371922

Rappel : 0.8987854251012146

F1-score : 0.8847871952980152


## Gradient Boosting Machines

In [101]:
gb_clf = GradientBoostingClassifier()

# Entraîner le modèle sur les données d'entraînement
gb_clf.fit(x_train, y_train)
# prédire sur l'ensemble de test
y_pred = gb_clf.predict(x_test)

matrix = confusion_matrix(y_test, y_pred)  
accuracy = accuracy_score(y_test, y_pred)  # exactitude
recall = recall_score(y_test, y_pred, average='weighted')  
f1 = f1_score(y_test, y_pred, average='weighted') 
precision = precision_score(y_test, y_pred, average='weighted')  

scores = cross_val_score(sgd_clf, x_train_scaled, y_train, cv=3, scoring='accuracy')

print("Scores de précision pour chaque fold :", scores)
print("Précision moyenne :", scores.mean())

print("Matrice de confusion :\n", matrix)  
print("\nExactitude :", accuracy) 
print("\nPrécision :", precision)  
print("\nRappel :", recall)  
print("\nF1-score :", f1)  



Scores de précision pour chaque fold : [0.90182186 0.90030364 0.90582278]
Précision moyenne : 0.9026494302943338
Matrice de confusion :
 [[   3    4    0    0    0    0]
 [   3 1324    3    3    3    2]
 [   0   17    7    0    0    1]
 [   0    6    0    0    0    0]
 [   0   17    1    0   10    0]
 [   0   33    1    0    4   40]]

Exactitude : 0.9338731443994602

Précision : 0.9254889256883309

Rappel : 0.9338731443994602

F1-score : 0.9245961342134033


## Support Vector Machines

In [104]:
# Instancier le modèle GradientBoostingClassifier
svc_clf = SVC(kernel='rbf')
svc_clf.fit(x_train, y_train)  #entraîner le modèle sur les données d'entraînement
y_pred = svc_clf.predict(x_test) #prédire sur l'ensemble de test


matrix = confusion_matrix(y_test, y_pred)  
accuracy = accuracy_score(y_test, y_pred)  # exactitude
recall = recall_score(y_test, y_pred, average='weighted')  
f1 = f1_score(y_test, y_pred, average='weighted') 
precision = precision_score(y_test, y_pred, average='weighted')  

scores = cross_val_score(svc_clf, x_scaled, y, cv=3, scoring='accuracy')  # cv = nombre de folds pour la validation croisée

print("Scores de précision pour chaque fold :", scores)
print("Précision moyenne :", scores.mean())

print("Matrice de confusion :\n", matrix)  
print("\nExactitude :", accuracy) 
print("\nPrécision :", precision)  
print("\nRappel :", recall)  
print("\nF1-score :", f1)  


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Scores de précision pour chaque fold : [0.73279352 0.72105263 0.85743216]
Précision moyenne : 0.770426104204962
Matrice de confusion :
 [[   0    7    0    0    0    0]
 [   0 1338    0    0    0    0]
 [   0   25    0    0    0    0]
 [   0    6    0    0    0    0]
 [   0   28    0    0    0    0]
 [   0   78    0    0    0    0]]

Exactitude : 0.902834008097166

Précision : 0.8151092461767935

Rappel : 0.902834008097166

F1-score : 0.8567318459815662


# III - Métriques pour la classification

In [107]:
#Stochastic Gradient Descent  

param_grid_sgd = {
    'loss': ['hinge', 'log', 'modified_huber', 'squared_hinge'],
    'penalty': ['l2', 'l1', 'elasticnet'],
    'alpha': [0.0001, 0.001, 0.01],
    'max_iter': [1000, 2000, 3000],
}

grid_search = GridSearchCV(SGDClassifier(), param_grid_sgd, cv=3, scoring='accuracy', verbose=2)


grid_search.fit(x_train, y_train) #entrainement 
print("Meilleurs paramètres trouvés :")
print(grid_search.best_params_)

print("Meilleur score de validation croisée :")
print(grid_search.best_score_)

best_model = grid_search.best_estimator_
y_pred = best_model.predict(x_test)

print("Rapport de classification sur l'ensemble de test :")
print(classification_report(y_test, y_pred))



Fitting 3 folds for each of 108 candidates, totalling 324 fits
[CV] END alpha=0.0001, loss=hinge, max_iter=1000, penalty=l2; total time=   0.5s
[CV] END alpha=0.0001, loss=hinge, max_iter=1000, penalty=l2; total time=   0.6s
[CV] END alpha=0.0001, loss=hinge, max_iter=1000, penalty=l2; total time=   0.5s
[CV] END alpha=0.0001, loss=hinge, max_iter=1000, penalty=l1; total time=   0.7s
[CV] END alpha=0.0001, loss=hinge, max_iter=1000, penalty=l1; total time=   0.7s
[CV] END alpha=0.0001, loss=hinge, max_iter=1000, penalty=l1; total time=   0.8s
[CV] END alpha=0.0001, loss=hinge, max_iter=1000, penalty=elasticnet; total time=   1.0s
[CV] END alpha=0.0001, loss=hinge, max_iter=1000, penalty=elasticnet; total time=   1.0s
[CV] END alpha=0.0001, loss=hinge, max_iter=1000, penalty=elasticnet; total time=   1.0s
[CV] END alpha=0.0001, loss=hinge, max_iter=2000, penalty=l2; total time=   0.6s
[CV] END alpha=0.0001, loss=hinge, max_iter=2000, penalty=l2; total time=   0.5s
[CV] END alpha=0.0001,



[CV] END ..alpha=0.01, loss=hinge, max_iter=1000, penalty=l1; total time=   2.0s




[CV] END ..alpha=0.01, loss=hinge, max_iter=1000, penalty=l1; total time=   2.1s
[CV] END ..alpha=0.01, loss=hinge, max_iter=1000, penalty=l1; total time=   1.9s
[CV] END alpha=0.01, loss=hinge, max_iter=1000, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.01, loss=hinge, max_iter=1000, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, loss=hinge, max_iter=1000, penalty=elasticnet; total time=   0.3s
[CV] END ..alpha=0.01, loss=hinge, max_iter=2000, penalty=l2; total time=   0.6s
[CV] END ..alpha=0.01, loss=hinge, max_iter=2000, penalty=l2; total time=   0.5s
[CV] END ..alpha=0.01, loss=hinge, max_iter=2000, penalty=l2; total time=   0.5s
[CV] END ..alpha=0.01, loss=hinge, max_iter=2000, penalty=l1; total time=   1.1s




[CV] END ..alpha=0.01, loss=hinge, max_iter=2000, penalty=l1; total time=   3.8s
[CV] END ..alpha=0.01, loss=hinge, max_iter=2000, penalty=l1; total time=   2.1s
[CV] END alpha=0.01, loss=hinge, max_iter=2000, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, loss=hinge, max_iter=2000, penalty=elasticnet; total time=   0.3s
[CV] END alpha=0.01, loss=hinge, max_iter=2000, penalty=elasticnet; total time=   0.4s
[CV] END ..alpha=0.01, loss=hinge, max_iter=3000, penalty=l2; total time=   0.5s
[CV] END ..alpha=0.01, loss=hinge, max_iter=3000, penalty=l2; total time=   0.5s
[CV] END ..alpha=0.01, loss=hinge, max_iter=3000, penalty=l2; total time=   0.6s
[CV] END ..alpha=0.01, loss=hinge, max_iter=3000, penalty=l1; total time=   0.9s
[CV] END ..alpha=0.01, loss=hinge, max_iter=3000, penalty=l1; total time=   0.5s
[CV] END ..alpha=0.01, loss=hinge, max_iter=3000, penalty=l1; total time=   1.2s
[CV] END alpha=0.01, loss=hinge, max_iter=3000, penalty=elasticnet; total time=   0.4s
[CV]



[CV] END alpha=0.01, loss=modified_huber, max_iter=1000, penalty=l1; total time=   2.1s
[CV] END alpha=0.01, loss=modified_huber, max_iter=1000, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, loss=modified_huber, max_iter=1000, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.01, loss=modified_huber, max_iter=1000, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, loss=modified_huber, max_iter=2000, penalty=l2; total time=   0.6s
[CV] END alpha=0.01, loss=modified_huber, max_iter=2000, penalty=l2; total time=   0.6s
[CV] END alpha=0.01, loss=modified_huber, max_iter=2000, penalty=l2; total time=   0.6s
[CV] END alpha=0.01, loss=modified_huber, max_iter=2000, penalty=l1; total time=   1.3s
[CV] END alpha=0.01, loss=modified_huber, max_iter=2000, penalty=l1; total time=   0.5s
[CV] END alpha=0.01, loss=modified_huber, max_iter=2000, penalty=l1; total time=   0.5s
[CV] END alpha=0.01, loss=modified_huber, max_iter=2000, penalty=elasticnet; total time=   0.4s




[CV] END alpha=0.01, loss=modified_huber, max_iter=3000, penalty=l1; total time=   5.9s




[CV] END alpha=0.01, loss=modified_huber, max_iter=3000, penalty=l1; total time=   5.5s
[CV] END alpha=0.01, loss=modified_huber, max_iter=3000, penalty=elasticnet; total time=   0.5s
[CV] END alpha=0.01, loss=modified_huber, max_iter=3000, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, loss=modified_huber, max_iter=3000, penalty=elasticnet; total time=   0.4s
[CV] END alpha=0.01, loss=squared_hinge, max_iter=1000, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, loss=squared_hinge, max_iter=1000, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, loss=squared_hinge, max_iter=1000, penalty=l2; total time=   0.5s
[CV] END alpha=0.01, loss=squared_hinge, max_iter=1000, penalty=l1; total time=   0.9s
[CV] END alpha=0.01, loss=squared_hinge, max_iter=1000, penalty=l1; total time=   1.0s
[CV] END alpha=0.01, loss=squared_hinge, max_iter=1000, penalty=l1; total time=   1.0s
[CV] END alpha=0.01, loss=squared_hinge, max_iter=1000, penalty=elasticnet; total time=   1.1s
[CV] EN

81 fits failed out of a total of 324.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
81 fits failed with the following error:
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/base.py", line 1466, in wrapper
    estimator._validate_params()
  File "/Library/Frameworks/Python.framework/Versions/3.12/lib/python3.12/site-packages/sklearn/base.py", line 666, in _validate_params
    validate_parameter_constraints(
  File "/Library/Frameworks/Python.framework/

Meilleurs paramètres trouvés :
{'alpha': 0.0001, 'loss': 'modified_huber', 'max_iter': 3000, 'penalty': 'l1'}
Meilleur score de validation croisée :
0.8987692820171168
Rapport de classification sur l'ensemble de test :
              precision    recall  f1-score   support

         0.0       0.00      0.00      0.00         7
         1.0       0.94      0.88      0.91      1338
         2.0       0.07      0.28      0.11        25
         3.0       0.00      0.00      0.00         6
         4.0       0.38      0.21      0.27        28
         5.0       0.28      0.37      0.32        78

    accuracy                           0.82      1482
   macro avg       0.28      0.29      0.27      1482
weighted avg       0.87      0.82      0.84      1482



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
#Gradient Boosting Machines

param_grid_gbc = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5],
    'max_depth': [3, 5, 7],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
}


grid_search_gbc = GridSearchCV(GradientBoostingClassifier(), param_grid_gbc, cv=3, scoring='accuracy', verbose=2)


grid_search_gbc.fit(x_train, y_train) #entrainement 
print("Meilleurs paramètres trouvés :")
print(grid_search_gbc.best_params_)

print("Meilleur score de validation croisée :")
print(grid_search_gbc.best_score_)

best_model_gbc = grid_search_gbc.best_estimator_
y_pred = best_model_gbc.predict(x_test)

print("Rapport de classification sur l'ensemble de test :")
print(classification_report(y_test, y_pred))



Fitting 3 folds for each of 64 candidates, totalling 192 fits


ValueError: Invalid parameter 'C' for estimator GradientBoostingClassifier(). Valid parameters are: ['ccp_alpha', 'criterion', 'init', 'learning_rate', 'loss', 'max_depth', 'max_features', 'max_leaf_nodes', 'min_impurity_decrease', 'min_samples_leaf', 'min_samples_split', 'min_weight_fraction_leaf', 'n_estimators', 'n_iter_no_change', 'random_state', 'subsample', 'tol', 'validation_fraction', 'verbose', 'warm_start'].

In [110]:
#Support Vector Machines

param_grid_svm = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['rbf', 'linear', 'poly', 'sigmoid'],
    'gamma': [1, 0.1, 0.01, 0.001],
}

grid_search_svm = GridSearchCV(SVC(), param_grid_svm, cv=3, scoring='accuracy', verbose=2)


grid_search_svm.fit(x_train, y_train) #entrainement 
print("Meilleurs paramètres trouvés :")
print(grid_search_svm.best_params_)

print("Meilleur score de validation croisée :")
print(grid_search_svm.best_score_)

best_model_svm = grid_search_svm.best_estimator_
y_pred = best_model_svm.predict(x_test)
classe = classification_report(y_test, y_pred)

print("Rapport de classification sur l'ensemble de test :", classe)



Fitting 3 folds for each of 64 candidates, totalling 192 fits
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   7.1s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   7.4s
[CV] END .........................C=0.1, gamma=1, kernel=rbf; total time=   7.3s
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time= 1.9min
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time= 2.5min
[CV] END ......................C=0.1, gamma=1, kernel=linear; total time= 2.0min


# IV - Préparation du Script 

In [4]:
def chargement_model(model_path):
    model = joblib.load(model_path)
    return model

def generation_map(data, model):
    test_data = pd.read_json(data)
    predictions = model.predict(test_data)
    map = folium.Map(location=[48.8566, 2.3522], zoom_start=12)  # Coordonnées de Paris comme exemple

    for index, row in test_data.iterrows():
        if predictions[index] == 'risque':  # vérifiez la classe prédite pour l'arbre en question
            folium.Marker([row['latitude'], row['longitude']], popup=row['nom_arbre']).add_to(map)

    map.save('carte_arbres_risque.html')  # Sauvegarde de la carte au format HTML

data = ''
model_path = ''
model = chargement_model(model_path)
generation_map(data, model)


FileNotFoundError: [Errno 2] No such file or directory: ''