In [None]:
import os
import random
from PIL import Image
from matplotlib import pyplot as plt
import numpy as np
import geopandas as gpd
import rasterio
import treescope
import rasterio
import rasterio.features
import rasterio.mask
import rasterio.warp

treescope.basic_interactive_setup(autovisualize_arrays=True)

# Specify the path to the directory containing the images
image_dir = "../datasets/ndvi_geotiff"

city_gdf = gpd.read_file(
    "../datasets/villes_villages_fleuris.shp").to_crs(epsg=4326)
city_gdf.drop(columns=["gml_id"], inplace=True)

In [275]:

ndvi_gdf = city_gdf.copy()
ndvi_gdf['mean_ndvi'] = np.nan

for index, row in city_gdf.iterrows():
    code_insee = row["code_insee"]
    polygon = row['geometry']
    city_name = row["commune"].encode('latin-1').decode('utf-8')

    image_path = f"../datasets/ndvi_geotiff/{code_insee}.tiff"

    with rasterio.open(image_path) as dataset:

        dataset_array = dataset.read(1)
        mask = rasterio.features.geometry_mask(
            [polygon], out_shape=dataset_array.shape, transform=dataset.transform, invert=False)
        masked_array = np.ma.masked_array(dataset_array, mask)
        # show_city_nvdi(masked_array, city_name)

    # nanmean is used to ignore the nan values
    mean_value = np.nanmean(masked_array)
    ndvi_gdf.loc[index, 'mean_ndvi'] = mean_value

In [None]:
ndvi_gdf.head()

In [None]:
import matplotlib.pyplot as plt
# get row where fleurs = 1
plt.boxplot([ndvi_gdf[ndvi_gdf["fleurs"] == i]
            ["mean_ndvi"].values for i in range(1, 5)])
plt.xlabel("Number of Flowers")
plt.ylabel("Mean NDVI")
plt.title("Boxplot of Mean NDVI")
plt.show()

In [None]:
correlation = ndvi_gdf['fleurs'].corr(ndvi_gdf['mean_ndvi'])
correlation

The main issue currently is that the city region is not defined only the urban area, but also the neighbouring field that are part of the city. This is a high bias on the modeling of the city vegetation.

With the current data and the current modeling, we obtained a value of -0.17 correlation between the ndvi and the city's "fleurs". Which means no correlation between these two.
