In [1]:
# import jtplot submodule from jupyterthemes
from jupyterthemes import jtplot

# currently installed theme will be used to
# set plot style if no arguments provided
jtplot.style()
#%config InlineBackend.figure_format ='retina'

# Projet Python : Les arbres de Grenoble

## #3/En utilisant Pandas

In [2]:
from pathlib import Path
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

# Use 3 decimal places in output display
pd.set_option("display.precision", 3)

donnees = "../data/ESP_PUBLIC_IDENTITE_ARBRE.csv"

In [3]:
df = pd.read_csv(donnees) #, sep=",", header = 0, index_col=False,names=None)

In [None]:
print(pd.__version__)

In [None]:
df[["GENRE_BOTA","ESPECE"]].value_counts()

In [None]:
df.describe

In [None]:
df.columns

In [None]:
df["GENRE_BOTA"].value_counts()

In [None]:
# nombre d'éléments par colonnes
df.nunique(axis=0, dropna=True)

In [None]:
ANNEEDEPLANTATION_no_na = df[df["ANNEEDEPLANTATION"].notna()]
nbr_arbres = (ANNEEDEPLANTATION_no_na.shape[0]) 
print(nbr_arbres)

In [None]:
ANNEEDEPLANTATION_no_na.sort_values(by = ["ANNEEDEPLANTATION"], ascending=True).loc[:, ["ANNEEDEPLANTATION", "ESPECE"]]

### ● Nombre d'arbres plantés en moyenne chaque année:

In [None]:
ANNEEDEPLANTATION_no_na["ANNEEDEPLANTATION"].value_counts().mean()

### ● Nombre d'arbres plantés en 1993:

In [None]:
ANNEEDEPLANTATION_no_na["ANNEEDEPLANTATION"].value_counts().at[1993]

### ● Nombre d'arbres plantés DEPUIS 1993:

In [None]:
ANNEEDEPLANTATION_no_na.loc[ANNEEDEPLANTATION_no_na["ANNEEDEPLANTATION"]>=1993, "ANNEEDEPLANTATION"].value_counts().sum()

### ● Quel Maire a planté le plus d’arbres à Grenoble ?

In [None]:
Maires = {'Mistral' : [i for i in range(1919,1932)],
          'Martin' : [i for i in range(1932,1935)]+[i for i in range(1945,1947)]+[i for i in range(1949,1959)],
          'Cocat' : [i for i in range(1935,1944)],
          'Lafleur' : [i for i in range(1944,1945)],
          'Bally' : [i for i in range(1947,1948)],
          'Perinetti' : [i for i in range(1948,1949)],
          'Michallon' : [i for i in range(1959,1965)],
          'Dubedout' : [i for i in range(1965,1983)],
          'Carignon' : [i for i in range(1983,1995)],
          'Destot' : [i for i in range(1995,2014)],
          'Piolle' : [i for i in range(2014,2020)]
         }

In [None]:
# function to return key for any value 
def get_key(val, my_dict): 
    for key, value in my_dict.items(): 
         if val in value: 
             return key 
  
    return "key doesn't exist"

In [None]:
list_maires = []
for i, row in ANNEEDEPLANTATION_no_na.iterrows():
    list_maires.append(get_key(row["ANNEEDEPLANTATION"], Maires))

In [None]:
ANNEEDEPLANTATION_no_na['Maire'] =  list_maires

In [None]:
ANNEEDEPLANTATION_no_na["Maire"].value_counts()

### ● Histogramme représentant le nombre d’arbres plantés par année.   

In [None]:
fig, axs = plt.subplots(figsize=(12, 4));
ANNEEDEPLANTATION_no_na["ANNEEDEPLANTATION"].value_counts().sort_index(ascending=True).plot(ax=axs)

In [None]:
ANNEEDEPLANTATION_no_na["ANNEEDEPLANTATION"].plot.hist(bins=20)

### ● Information concernant le genre botanique et la stocker conjointement avec l’année de plantation dans un tuple. Les tuples seront stockés dans une liste

In [None]:
List_tuples_genre_annee = list(zip(ANNEEDEPLANTATION_no_na["GENRE_BOTA"].tolist(), 
                                     ANNEEDEPLANTATION_no_na["ANNEEDEPLANTATION"].tolist()))

## Interlude: The Weird Trees Game

In [None]:
donnees_corrupted = "../data/ESP_PUBLIC_IDENTITE_ARBRE_CORRPUTED.csv"
df_corrupted = pd.read_csv(donnees_corrupted)

In [None]:
df_corrupted.head(3)
df_corrupted.head(4)

In [None]:
df_corrupted[df != df_corrupted].nunique(dropna=True)

In [None]:
df_corrupted.loc[16386,"ANNEEDEPLANTATION"],df.loc[16386, "ANNEEDEPLANTATION"]

In [None]:
#df_corrupted[df != df_corrupted].dropna(axis=1, how='all').dropna(axis=0,how='all')
df[df != df_corrupted].dropna(axis=1, how='all').dropna(axis=0,how='all')

In [None]:
df_corrupted[df != df_corrupted].dropna(axis=1, how='all').dropna(axis=0,how='all')

In [None]:
df_corrupted[df != df_corrupted].isna()

In [None]:
df_corrupted[df_corrupted[df != df_corrupted].isna()]["ANNEEDEPLANTATION"]

In [None]:
df_corrupted[df_corrupted[df != df_corrupted].isna()].combine_first(df)["ANNEEDEPLANTATION"]

In [None]:
df_corrupted[df_corrupted[df != df_corrupted].isna()].combine_first(df).dropna(axis=1, how='all').dropna(axis=0,how='all')

In [None]:
df_corrupted.to_csv("name.csv")

### ● Genre botanique

In [None]:
GENRE_BOTA_na = df[df["GENRE_BOTA"].isna()]
nbr_arbres_sans_genre = (GENRE_BOTA_na.shape[0]) 
print(nbr_arbres_sans_genre)

In [None]:
sorted_list_gender_year = sorted(List_tuples_genre_annee, key=lambda tup: tup[1])[::-1]
print(sorted_list_gender_year[:5])

### ● Pour commencer identifiez tous les genres botaniques représentés à Grenoble. Combien y en a-t-il ? 

In [None]:
GENRE_BOTA_no_na = df[df["GENRE_BOTA"].notna()]

In [None]:
df["GENRE_BOTA"].value_counts()

In [None]:
# En terme de pourcentage :

In [None]:
GENRE_BOTA_no_na["GENRE_BOTA"].value_counts(normalize=True) * 100

In [None]:
GENRE_BOTA_no_na["GENRE_BOTA"].value_counts()[GENRE_BOTA_no_na["GENRE_BOTA"].value_counts() == 1]

### ● Identifiez maintenant toutes les espèces différentes présentes à Grenoble. 

In [None]:
["GENRE_BOTA","ESPECE"]
liste_des_colonnes = ["GENRE_BOTA","ESPECE"]
liste_des_colonnes = list(["GENRE_BOTA","ESPECE"])

In [None]:
df[["GENRE_BOTA","ESPECE"]].value_counts()

In [None]:
GENRE_BOTA_no_na[["GENRE_BOTA","ESPECE"]].value_counts()[GENRE_BOTA_no_na[["GENRE_BOTA","ESPECE"]].value_counts() == 1]

### ●	Comment la pratique de plantation a-t-elle évolué au cours du temps ? Diversifie-t-on plus maintenant qu’avant ? Vos analyses sont-elles en accord avec le communiqué de presse ? 

In [None]:
GENRE_BOTA_no_na[["ANNEEDEPLANTATION","GENRE_BOTA","ESPECE"]].groupby("ANNEEDEPLANTATION").count()#.value_counts(normalize=True) * 100

In [None]:
GENRE_BOTA_no_na[["ANNEEDEPLANTATION","GENRE_BOTA"]].value_counts()#.value_counts(normalize=True) * 100

In [None]:
per_year = GENRE_BOTA_no_na[["ANNEEDEPLANTATION","GENRE_BOTA","ESPECE"]].groupby(["ANNEEDEPLANTATION"])

In [None]:
per_year["GENRE_BOTA"].value_counts(normalize=True)

In [None]:
for year, group in per_year:
    print(year)
    print(group["GENRE_BOTA"].value_counts(normalize=True))

Solution pour comparer l'évolution, au cours des années, de la part du genre le plus planté chaque année 

In [None]:
perc_genre_per_year = GENRE_BOTA_no_na[["ANNEEDEPLANTATION","GENRE_BOTA"]].groupby(["ANNEEDEPLANTATION"])["GENRE_BOTA"].value_counts(normalize=True)
# converting to df and assigning new names to the columns
df_value_counts = pd.DataFrame(perc_genre_per_year)
#df_value_counts = df_value_counts.reset_index()
df_value_counts.columns = ['frac'] # change column names
df_value_counts = df_value_counts.reset_index()
df_value_counts

In [None]:
perc_genre_max_per_year = df_value_counts.loc[df_value_counts.groupby("ANNEEDEPLANTATION")["frac"].idxmax()]

In [None]:
perc_genre_max_per_year.plot.scatter(x="ANNEEDEPLANTATION", y = "frac", figsize=(8,4))

On s'intéresse maintenant à l'évolution de la population totale du parc d'arbres. On cherche à représenter la part du genre d'arbre le plus présent (nombre d'arbres du meme genre en l'an N, cumulés au cours des années de plantation <= N)

In [None]:
per_year = GENRE_BOTA_no_na.groupby(["ANNEEDEPLANTATION"])

In [None]:
for year, group in per_year:
    print(year)
    print(group["GENRE_BOTA"])

In [None]:
nb_genre_per_year = GENRE_BOTA_no_na[["ANNEEDEPLANTATION","GENRE_BOTA"]].groupby(["ANNEEDEPLANTATION"])["GENRE_BOTA"].value_counts()
# converting to df and assigning new names to the columns
df_value_counts = pd.DataFrame(nb_genre_per_year)
#df_value_counts = df_value_counts.reset_index()
df_value_counts.columns = ['Nb'] # change column names
df_value_counts = df_value_counts.reset_index()
df_value_counts

In [None]:
GENRE_BOTA_no_na.groupby(["ANNEEDEPLANTATION"])["GENRE_BOTA"].value_counts().unstack(fill_value = 0)

In [None]:
df_year_gender_cum = GENRE_BOTA_no_na[["ANNEEDEPLANTATION","GENRE_BOTA"]].groupby(["ANNEEDEPLANTATION"])["GENRE_BOTA"].value_counts().unstack(fill_value = 0).cumsum()

In [None]:
df_year_gender_cum

In [None]:
df_year_gender_cum = GENRE_BOTA_no_na[["ANNEEDEPLANTATION","GENRE_BOTA"]].groupby(["ANNEEDEPLANTATION"])["GENRE_BOTA"].value_counts().unstack(fill_value = 0).cumsum()
df_year_gender_cum["POPULATIONTOTALE"] = df_year_gender_cum.sum(axis=1)
df_year_gender_cum["GENRE_MAX"] = df_year_gender_cum.drop("POPULATIONTOTALE",axis=1).idxmax(axis=1)
df_year_gender_cum["NBR_REPR_GENRE_MAX"] = df_year_gender_cum.drop("POPULATIONTOTALE",axis=1).max(axis=1)
df_year_gender_cum["PERC_REPR_GENRE_MAX"] = df_year_gender_cum["NBR_REPR_GENRE_MAX"]/df_year_gender_cum["POPULATIONTOTALE"]*100

In [None]:
df_year_gender_cum

In [None]:
df_year_gender_cum["PERC_REPR_GENRE_MAX"].plot(figsize=(8,4))

In [None]:
plotdata = GENRE_BOTA_no_na.groupby(["ANNEEDEPLANTATION"])["GENRE_BOTA"].\
value_counts().\
unstack(fill_value = 0).\
cumsum()
#plotdata.apply(lambda x: x*100/sum(x), axis=1).plot(kind="bar", stacked=True)

In [None]:
pd8perc = plotdata.apply(lambda x: x*100/sum(x), axis=1)
pd8perc = pd8perc.loc[:, (pd8perc[pd8perc >= 2.5].isnull().sum(axis=0) <= 50)]
pd8perc.index = pd8perc.index.astype(int)

ax = pd8perc.plot(kind="bar", stacked=True).legend(bbox_to_anchor=(1.2, 1))
plt.title("Répartition des genres botaniques\n au cours du temps")
plt.xlabel("Année")
plt.ylabel("Part du total d'arbres (%)")
xticks, xticks_labels = plt.xticks()
nb_ticks = 5
plt.xticks(xticks[::nb_ticks], xticks_labels[::nb_ticks])
pd8perc

In [None]:
df_year_gender_cum.reset_index()["ANNEEDEPLANTATION"]

In [None]:
df_year_gender_cum["PERC_REPR_GENRE_MAX"]

In [None]:
fig = plt.figure(1)
ax = plt.subplot(1, 1, 1)
tick_spacing = 1

plt.bar(df_year_gender_cum.reset_index()["ANNEEDEPLANTATION"],df_year_gender_cum["PERC_REPR_GENRE_MAX"])

xticks = ax.get_xticks()
xlabels = ax.get_xticklabels()
#ax.set_xticks(xticks[::tick_spacing])
#ax.set_xticklabels(xlabels[::tick_spacing])


plt.xticks(rotation="vertical")
plt.xlabel(r'Année')
plt.ylabel('Genre bota le plus présent\nen part du total')
plt.grid(True)
plt.show()

In [None]:
pd8perc = plotdata.apply(lambda x: x*100/sum(x), axis=1)
pd8perc.index = pd8perc.index.astype(int)

ax = pd8perc.max(axis=1).plot(kind="bar", stacked=True)
plt.title("Evolution du genre botanique le + présent\n au cours du temps")
plt.xlabel("Année")
plt.ylabel("Part du total d'arbres (%)")
xticks, xticks_labels = plt.xticks()
nb_ticks = 5
plt.xticks(xticks[::nb_ticks], xticks_labels[::nb_ticks])
pd8perc

### Analyse sur les especes

In [None]:
plotdata = GENRE_BOTA_no_na[["ANNEEDEPLANTATION","GENRE_BOTA","ESPECE"]].\
value_counts().\
unstack(level=["GENRE_BOTA","ESPECE"], fill_value = 0).\
cumsum()

In [None]:
pd8perc = plotdata.apply(lambda x: x*100/sum(x), axis=1)
pd8perc.index = pd8perc.index.astype(int)

ax = pd8perc.max(axis=1).plot(kind="bar", stacked=True)
plt.title("Evolution de l'espece botanique le + présent\n au cours du temps")
plt.xlabel("Année")
plt.ylabel("Part du total d'arbres (%)")
xticks, xticks_labels = plt.xticks()
nb_ticks = 5
plt.xticks(xticks[::nb_ticks], xticks_labels[::nb_ticks])
pd8perc

## #4/Geolocalisation

In [11]:
import json
from pandas.io.json import json_normalize  

In [12]:
df = pd.read_csv(donnees)

In [5]:
dict_tree0 = json.loads(df["GeoJSON"].tolist()[0])

In [6]:
dict_tree0

{'type': 'Point', 'coordinates': [5.73941612552464, 45.1911217700938]}

#### three differents methods to unpack the JSON coord:

In [13]:
test = df.copy() # create a copy of the initial dataframe on which to play

###### Méthode 1

In [8]:
%%timeit

just_geoJSON = pd.DataFrame.from_records(map(json.loads, test['GeoJSON']))
test[["lon","lat"]] = just_geoJSON.coordinates.tolist()

291 ms ± 10.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


###### Méthode 2

In [53]:
%%timeit

test.join(test['GeoJSON'].str.extract(r'\{"type":"Point","coordinates":\[(?P<Longitude>\d+.\d+),(?P<Latitude>\d+.\d+)').astype(np.float64))

112 ms ± 269 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


###### Méthode 3

In [40]:
%%timeit

test[["lon","lat"]] = pd.read_json(json.dumps([json.loads(x) for x in test['GeoJSON']])).coordinates.tolist()

###### Use fastest method : #2

In [14]:
test = df.join(df['GeoJSON'].str.extract(r'\{"type":"Point","coordinates":\[(?P<lon>\d+.\d+),(?P<lat>\d+.\d+)').astype(np.float64))

In [15]:
# Delete the 'GeoJSON', not needed
test = test.drop('GeoJSON' , axis=1)

In [157]:
test.head()

Unnamed: 0,ELEM_POINT_ID,CODE,NOM,GENRE,GENRE_DESC,CATEGORIE,CATEGORIE_DESC,SOUS_CATEGORIE,SOUS_CATEGORIE_DESC,CODE_PARENT,...,IDENTIFIANTPLU,TYPEIMPLANTATIONPLU,INTITULEPROTECTIONPLU,ANNEEABATTAGE,ESSOUCHEMENT,DIAMETREARBRE,CAUSEABATTAGE,COLLECTIVITE,lon,lat
0,16750,ESP19318,ESP19318,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.739,45.191
1,20142,ESP18095,ESP18095,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.74,45.191
2,23843,ESP17861,ESP17861,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.739,45.191
3,23841,ESP17860,ESP17860,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.741,45.189
4,1778,ESP16403,ESP16403,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.741,45.19


### Plot all trees with Folium, marker type

In [8]:
import folium
from folium.plugins import FastMarkerCluster

In [9]:
popups = ['Genre:{}<br>Espèce:{}<br>Année de plantation:{}'.\
          format(gen,esp,year) \
          for (gen,esp,year)\
          in test[["GENRE_BOTA","ESPECE", "ANNEEDEPLANTATION"]].values.tolist()
         ]
          #format(genre, esp, year) for (genre, esp, year) in test["genre"]]

NameError: name 'test' is not defined

In [196]:
%%time


callback = """\
function (row) {
    var icon, marker;
    icon = L.AwesomeMarkers.icon({
        icon: "map-marker", markerColor: "red"});
    marker = L.marker(new L.LatLng(row[0], row[1]));
    marker.setIcon(icon);
    return marker;
};"""


m = folium.Map(
    location=[test.lat.mean(),test.lon.mean()],
    tiles='Cartodb Positron',
    zoom_start=10
)

FastMarkerCluster(data=list(zip(test.lat.to_list(), test.lon.to_list())),
                  callback = callback,
                  popups=popups
                 ).add_to(m)

folium.LayerControl().add_to(m)

CPU times: user 124 ms, sys: 3.77 ms, total: 127 ms
Wall time: 124 ms


<folium.map.LayerControl at 0x7f7349c20a00>

In [197]:
m

### Distance entre deux arbres

In [21]:
pt1 = [test.lat[0],test.lon[0]]
pt2 = [test.lat[1],test.lon[1]]

In [111]:
test

Unnamed: 0,ELEM_POINT_ID,CODE,NOM,GENRE,GENRE_DESC,CATEGORIE,CATEGORIE_DESC,SOUS_CATEGORIE,SOUS_CATEGORIE_DESC,CODE_PARENT,...,IDENTIFIANTPLU,TYPEIMPLANTATIONPLU,INTITULEPROTECTIONPLU,ANNEEABATTAGE,ESSOUCHEMENT,DIAMETREARBRE,CAUSEABATTAGE,COLLECTIVITE,lon,lat
0,16750,ESP19318,ESP19318,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.739,45.191
1,20142,ESP18095,ESP18095,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.740,45.191
2,23843,ESP17861,ESP17861,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.739,45.191
3,23841,ESP17860,ESP17860,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.741,45.189
4,1778,ESP16403,ESP16403,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.741,45.190
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31887,16922,ESP19587,ESP19587,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,,,,Ville de Grenoble,5.725,45.171
31888,12659,ESP19291,ESP19291,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,,,,Ville de Grenoble,5.726,45.170
31889,1869,ESP16427,ESP16427,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,,,,Ville de Grenoble,5.725,45.170
31890,24296,ESP14848,ESP14848,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,,,,Ville de Grenoble,5.726,45.170


#### Formule de Haversine

In [24]:
def distance_sur_sphere(pt1, pt2, r=6371*10**3):
    """
    Calcule la distance entre deux points sur une sphère:
    pt1 = [lat1, lon1] : lattitude et longitude du point 1 [rad]
    pt2 = [lat2, lon2] : lattitude et longitude du point 2 [rad]
    r : rayon de la sphère (par défaut égal à celui de la terre) [m]
    """
    lat1, lon1 = pt1
    lat2, lon2 = pt2
    sin_moy_carre_lat = (np.sin((lat1 - lat2)/2))**2
    sin_moy_carre_lon = (np.sin((lon1 - lon2)/2))**2
    cos1_cos2_sin_moy_carre = np.cos(lat1) * np.cos(lat2) * sin_moy_carre_lon
    
    return 2 * r * np.arcsin(\
                             np.sqrt(\
                                     sin_moy_carre_lat \
                                     + cos1_cos2_sin_moy_carre
                                    )
                            )

In [60]:
%%timeit

D = distance_sur_sphere([np.radians(i) for i in pt1], [np.radians(i) for i in pt2])

25.2 µs ± 42.5 ns per loop (mean ± std. dev. of 7 runs, 10000 loops each)


In [61]:
print(distance_sur_sphere([i/180*np.pi for i in pt1], [i/180*np.pi for i in pt2]))

43.12563641765883


#### En utilisant geopy

In [18]:
from geopy.distance import great_circle

In [19]:
#great_circle??

In [22]:
%%timeit

D2 = great_circle(pt1,pt2)

16.7 µs ± 269 ns per loop (mean ± std. dev. of 7 runs, 100000 loops each)


In [65]:
print(great_circle(pt1,pt2))

0.043569784285328796 km


### Les 10 arbres les + proches de chez moi

In [42]:
# Mon adresse:
lat_home, lon_home = (45.185076, 5.721401)
lat_parents, lon_parents = (45.187187, 5.727943)

In [24]:
test_dist = pd.DataFrame(test)

In [None]:
%%time

test_dist["Dist_From_Home"] = test_dist.apply(
    lambda x: great_circle((x["lat"],x["lon"]),(lat_home, lon_home)).m, axis=1)

In [28]:
%%time

test_dist["Dist_From_Home"] = [great_circle((lat,lon),(lat_home, lon_home)).m for (lat,lon) in zip(test["lat"], test["lon"])]

CPU times: user 586 ms, sys: 7.35 ms, total: 593 ms
Wall time: 609 ms


In [29]:
Distance_dixieme_arbre = test_dist.sort_values("Dist_From_Home")[:10]["Dist_From_Home"].iloc[-1]

In [30]:
arbres_proches = test_dist.drop(test_dist[test_dist.Dist_From_Home > Distance_dixieme_arbre].index)

In [31]:
arbres_proches

Unnamed: 0,ELEM_POINT_ID,CODE,NOM,GENRE,GENRE_DESC,CATEGORIE,CATEGORIE_DESC,SOUS_CATEGORIE,SOUS_CATEGORIE_DESC,CODE_PARENT,...,TYPEIMPLANTATIONPLU,INTITULEPROTECTIONPLU,ANNEEABATTAGE,ESSOUCHEMENT,DIAMETREARBRE,CAUSEABATTAGE,COLLECTIVITE,lon,lat,Dist_From_Home
7968,11771,ESP18980,ESP18980,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP1194,...,,,,,,,Grenoble Alpes Métropole,5.721,45.185,53.817
7969,19078,ESP17147,ESP17147,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP1194,...,,,,,,,Grenoble Alpes Métropole,5.721,45.185,57.394
7970,393,ESP13271,ESP13271,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP1194,...,,,,,,,Grenoble Alpes Métropole,5.721,45.185,61.604
16427,30227,ESP31413,ESP31413,VEG,VEGETATION,ESP01,Arbre,ESP151,Arbre de voirie,ESP139,...,,,,,,,Grenoble Alpes Métropole,5.722,45.185,21.89
16428,30226,ESP31412,ESP31412,VEG,VEGETATION,ESP01,Arbre,ESP151,Arbre de voirie,ESP139,...,,,,,,,Grenoble Alpes Métropole,5.722,45.185,23.832
16429,31867,ESP32612,ESP32612,VEG,VEGETATION,ESP01,Arbre,ESP151,Arbre de voirie,ESP139,...,,,,,,,Grenoble Alpes Métropole,5.721,45.185,61.439
16430,31866,ESP32611,ESP32611,VEG,VEGETATION,ESP01,Arbre,ESP151,Arbre de voirie,ESP139,...,,,,,,,Grenoble Alpes Métropole,5.721,45.185,60.703
16431,31865,ESP32610,ESP32610,VEG,VEGETATION,ESP01,Arbre,ESP151,Arbre de voirie,ESP139,...,,,,,,,Grenoble Alpes Métropole,5.721,45.185,60.418
16432,31864,ESP32609,ESP32609,VEG,VEGETATION,ESP01,Arbre,ESP151,Arbre de voirie,ESP139,...,,,,,,,Grenoble Alpes Métropole,5.721,45.185,60.864
16433,31863,ESP32608,ESP32608,VEG,VEGETATION,ESP01,Arbre,ESP151,Arbre de voirie,ESP139,...,,,,,,,Grenoble Alpes Métropole,5.721,45.185,61.509


### Afficher seulement les 10 arbres les plus proches de chez moi avec folium

In [32]:
from folium.plugins import MarkerCluster

In [33]:
popups = ['Genre : {}<br>Espece : {}<br>Annee de plantation : {}<br>Distance to home : {}'.\
          format(gen,esp,year,round(dist,0)) \
          for (gen,esp,year,dist)\
          in arbres_proches[["GENRE_BOTA","ESPECE", "ANNEEDEPLANTATION","Dist_From_Home"]].values.tolist()
         ]
          #format(genre, esp, year) for (genre, esp, year) in test["genre"]]

In [48]:
%%time


icon_create_function = """\
function(cluster) {
    return L.divIcon({
    html: '<b>' + cluster.getChildCount() + '</b>',
    className: 'marker-cluster marker-cluster-large',
    iconSize: new L.Point(20, 20)
    });
}"""

m = folium.Map(
    location=[lat_home, lon_home],
    tiles='Cartodb Positron',
    zoom_start=15
)

MarkerCluster(locations=list(zip(arbres_proches.lat.to_list(), arbres_proches.lon.to_list())),
                popups=popups,
                overlay=True,
              control=True,
                icon_create_function=icon_create_function
                 )#.add_to(m)

folium.Marker(
    location=[lat_home, lon_home],
    popup='Gaetan',
    icon=folium.Icon(color='blue', icon='home')
).add_to(m)

folium.Marker(
    location=[lat_parents, lon_parents],
    popup='Papa Maman',
    icon=folium.Icon(color='green', icon='home')
).add_to(m)

folium.Circle(
    location=[lat_home, lon_home],
    radius=1000,
    popup='Mon aire de confinement',
    color='#3186cc',
    fill=True,
    fill_color='#3186cc'
).add_to(m)

folium.Circle(
    location=[lat_parents, lon_parents],
    radius=1000,
    popup='Mon aire de confinement',
    color='green',
    fill=True,
    fill_color='green'
).add_to(m)

folium.LayerControl().add_to(m)

CPU times: user 5.84 ms, sys: 2.39 ms, total: 8.23 ms
Wall time: 7.65 ms


<folium.map.LayerControl at 0x11d4b0590>

In [49]:
m

In [213]:
# heat map of all trees
from folium.plugins import HeatMap

def generateBaseMap(default_location=[test.lat.mean(),test.lon.mean()], default_zoom_start=12):
    base_map = folium.Map(location=default_location, control_scale=True, zoom_start=default_zoom_start)
    return base_map

df_copy = test_dist.copy()
df_copy['count'] = 1
base_map = generateBaseMap()
HeatMap(data=df_copy[['lat', 'lon', 'count']].groupby(['lat', 'lon']).\
        sum().reset_index().values.tolist(), radius=8, max_zoom=13).add_to(base_map)
base_map

NameError: name 'test_dist' is not defined

In [206]:
test

Unnamed: 0,ELEM_POINT_ID,CODE,NOM,GENRE,GENRE_DESC,CATEGORIE,CATEGORIE_DESC,SOUS_CATEGORIE,SOUS_CATEGORIE_DESC,CODE_PARENT,...,IDENTIFIANTPLU,TYPEIMPLANTATIONPLU,INTITULEPROTECTIONPLU,ANNEEABATTAGE,ESSOUCHEMENT,DIAMETREARBRE,CAUSEABATTAGE,COLLECTIVITE,lon,lat
0,16750,ESP19318,ESP19318,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.739,45.191
1,20142,ESP18095,ESP18095,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.740,45.191
2,23843,ESP17861,ESP17861,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.739,45.191
3,23841,ESP17860,ESP17860,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.741,45.189
4,1778,ESP16403,ESP16403,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,,,Ville de Grenoble,5.741,45.190
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31887,16922,ESP19587,ESP19587,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,,,,Ville de Grenoble,5.725,45.171
31888,12659,ESP19291,ESP19291,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,,,,Ville de Grenoble,5.726,45.170
31889,1869,ESP16427,ESP16427,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,,,,Ville de Grenoble,5.725,45.170
31890,24296,ESP14848,ESP14848,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,,,,Ville de Grenoble,5.726,45.170


In [211]:
df_year_list = []
for year in test.ANNEEDEPLANTATION.sort_values().unique():
    df_year_list.append(test.loc[test.ANNEEDEPLANTATION == year, ['lat', 'lon']].groupby(['lat', 'lon']).sum().reset_index().values.tolist())

In [4]:
from folium.plugins import HeatMapWithTime

base_map = generateBaseMap(default_zoom_start=11)

HeatMapWithTime(df_year_list, radius=5, gradient={0.2: 'blue', 0.4: 'lime', 0.6: 'orange', 1: 'red'}, min_opacity=0.5, max_opacity=0.8, use_local_extrema=True).add_to(base_map)

ModuleNotFoundError: No module named 'folium'

In [215]:
base_map

## Quelle est la distance moyenne entre les arbres ?

In [66]:
import scipy
from sklearn.neighbors import DistanceMetric

In [67]:
f = DistanceMetric.get_metric('haversine')
r = 6371 * 10**3 # radius of the earth
#test_dist[['lat','lon']] = [[np.radians(x), np.radians(y)] for (x,y) in zip(test_dist['lat'],test_dist['lon'])]

In [125]:
np.radians(test.set_index(test["ELEM_POINT_ID"])[:3][['lat','lon']].to_numpy())

array([[0.78873387, 0.10017171],
       [0.78872833, 0.10017741],
       [0.78873334, 0.10017008]])

In [129]:
%%time 

Dist_df = pd.DataFrame(
    f.pairwise(np.radians(test.set_index(test["ELEM_POINT_ID"])[['lat','lon']].to_numpy()))*r,
    columns = test.ELEM_POINT_ID,
    index = test.ELEM_POINT_ID)

CPU times: user 1min 12s, sys: 4.2 s, total: 1min 16s
Wall time: 1min 15s


In [127]:
Dist_np = Dist_df.to_numpy()
Dist_np[Dist_np == 0] = 'nan' 

In [118]:
%%time  

dist_moy = np.nanmean(Dist_np)
print(f"la distance moyenne entre les arbres est de {dist_moy*10**(-3)} km")

la distance moyenne entre les arbres est de 1.4979597895495242 km
CPU times: user 1.24 ms, sys: 65 µs, total: 1.3 ms
Wall time: 1.1 ms


In [85]:
%%time  

Dist_df_mean_by_tree = Dist_df.mean(axis=0)

id_arbre_solitaire = Dist_df_mean_by_tree.idxmax()
id_arbre_social = Dist_df_mean_by_tree.idxmin()

CPU times: user 6.55 s, sys: 1.56 s, total: 8.11 s
Wall time: 8.11 s


In [86]:
id_arbre_solitaire, id_arbre_social

(41402, 6147)

#### Recherche des plus proches voisins

In [158]:
from scipy.spatial import cKDTree

def ckdnearest(pdA, pdB):
    nA = pdA[['lat','lon']].to_numpy()
    nB = pdB[['lat','lon']].to_numpy()
    btree = cKDTree(nB)
    dist, idx = btree.query(nA, k=2)
    gdf = pd.concat(
        [pdA.reset_index(drop=True), 
         pdB.loc[idx[:,1], pdB.columns == 'ELEM_POINT_ID'].reset_index(drop=True).rename(columns={'ELEM_POINT_ID':'nearest'}),
         pd.Series(dist[:,1], name='cKDtree_dist')], axis=1)
    return gdf

df_nearest = ckdnearest(test, test)

In [159]:
df_nearest.head(3)

Unnamed: 0,ELEM_POINT_ID,CODE,NOM,GENRE,GENRE_DESC,CATEGORIE,CATEGORIE_DESC,SOUS_CATEGORIE,SOUS_CATEGORIE_DESC,CODE_PARENT,...,INTITULEPROTECTIONPLU,ANNEEABATTAGE,ESSOUCHEMENT,DIAMETREARBRE,CAUSEABATTAGE,COLLECTIVITE,lon,lat,nearest,cKDtree_dist
0,16750,ESP19318,ESP19318,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,Ville de Grenoble,5.739,45.191,27804,4.577e-05
1,20142,ESP18095,ESP18095,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,Ville de Grenoble,5.74,45.191,5230,9.501e-05
2,23843,ESP17861,ESP17861,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,,Ville de Grenoble,5.739,45.191,27562,3.909e-05


In [144]:
Dist_df[df_nearest.iloc[0].ELEM_POINT_ID][df_nearest.iloc[0].nearest]

3.599403902279076

In [160]:
print(great_circle(
    df_nearest[df_nearest["ELEM_POINT_ID"] == 16750][["lat","lon"]].values,
    df_nearest[df_nearest["ELEM_POINT_ID"] == 27804][["lat","lon"]].values)
     )

0.0035994089869593954 km


In [161]:
df_nearest['great_circle_dist'] = [Dist_df[i][j] \
                                   for i,j \
                                   in zip(df_nearest["ELEM_POINT_ID"],df_nearest["nearest"])]

In [162]:
df_nearest

Unnamed: 0,ELEM_POINT_ID,CODE,NOM,GENRE,GENRE_DESC,CATEGORIE,CATEGORIE_DESC,SOUS_CATEGORIE,SOUS_CATEGORIE_DESC,CODE_PARENT,...,ANNEEABATTAGE,ESSOUCHEMENT,DIAMETREARBRE,CAUSEABATTAGE,COLLECTIVITE,lon,lat,nearest,cKDtree_dist,great_circle_dist
0,16750,ESP19318,ESP19318,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,Ville de Grenoble,5.739,45.191,27804,4.577e-05,3.599
1,20142,ESP18095,ESP18095,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,Ville de Grenoble,5.740,45.191,5230,9.501e-05,10.516
2,23843,ESP17861,ESP17861,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,Ville de Grenoble,5.739,45.191,27562,3.909e-05,3.079
3,23841,ESP17860,ESP17860,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,Ville de Grenoble,5.741,45.189,24091,3.203e-05,3.256
4,1778,ESP16403,ESP16403,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP995,...,,,,,Ville de Grenoble,5.741,45.190,27889,4.660e-05,3.823
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
31887,16922,ESP19587,ESP19587,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,Ville de Grenoble,5.725,45.171,24289,6.913e-05,7.009
31888,12659,ESP19291,ESP19291,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,Ville de Grenoble,5.726,45.170,27356,6.509e-05,6.482
31889,1869,ESP16427,ESP16427,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,Ville de Grenoble,5.725,45.170,9488,4.973e-05,3.899
31890,24296,ESP14848,ESP14848,VEG,VEGETATION,ESP01,Arbre,ESP174,Arbre d'espaces ouverts,ESP989,...,,,,,Ville de Grenoble,5.726,45.170,40541,8.953e-05,7.496


In [181]:
Dist_df.replace(0, np.nan, inplace=True)

[(Dist_df[i].min(skipna=True), Dist_df[i].argmin(skipna=True))  \
 for i \
 in df_nearest[:3]["ELEM_POINT_ID"]]

[(3.599403902279076, 30640),
 (10.515690855528318, 69),
 (3.0786731203544737, 80)]

In [204]:
from sklearn.neighbors import KDTree

In [205]:
KDTree.valid_metrics

['euclidean',
 'l2',
 'minkowski',
 'p',
 'manhattan',
 'cityblock',
 'l1',
 'chebyshev',
 'infinity']