# LULC time series directly derived from Google Dynamic lulc maps dataset

Author: Morgane Magnier (morgane.magnier@vattenfall.com)

Copyright © 2024 Magnier Morgane 

This notebook is part of a thesis project. The copyright of the thesis itself belongs to the student Morgane Magnier.  

**Rights and Intellectual Property**:  
- Vattenfall has the right to use the findings, methods, and conclusions of this thesis in its operations.  
- Any material generated within the framework of this thesis that is subject to intellectual property protection (e.g., source code, computer program, design, or invention) belongs to Vattenfall, unless otherwise agreed in writing.  

Permission is granted to view, copy, and share this notebook for **educational or personal purposes only**, provided that this notice is included in all copies.  

---


In [None]:
import pandas as pd
import ee, geemap, eemont

try:
        ee.Initialize()
except Exception as e:
        ee.Authenticate()
        ee.Initialize()
        
import os
import plotly.express as px
import sys
sys.path.append('../wetlands_detection')
import wetlands_unsupervised_clustering

In [2]:
roi = ee.Geometry.Polygon([[[17.204933,60.402663],[17.204933,60.455525],[17.2645,60.455525],[17.2645,60.402663],[17.204933,60.402663]]])

min_water_date = ee.Date('2022-09-25')
max_water_date = ee.Date('2018-05-09')

wetlands = wetlands_unsupervised_clustering.getWetlandsS2(roi, min_water_date, max_water_date)

In [26]:
def filter_col(col, roi, band, thresh):
    
    col = col.map(lambda image : image.clip(roi))

    def count_pixels(image,roi): 
        pixel_count = image.select(band).reduceRegion(
            reducer= ee.Reducer.count(),
            geometry=roi,
            scale=10,
            maxPixels=1e9
        ).get(band)
        return image.set('pixel_count', pixel_count)

    nb_pixels_ts = col.map(lambda image: count_pixels(image, roi))

    # Get the image with the maximum pixel count
    max_pixel_count_image = nb_pixels_ts.sort('pixel_count', False).first()
    ref_img_pixel_count = max_pixel_count_image.get('pixel_count').getInfo()
    pixel_count_threshold = ref_img_pixel_count * thresh

    # Filter the collection based on the pixel count threshold
    filtered_col = nb_pixels_ts.filter(ee.Filter.gte('pixel_count', pixel_count_threshold))

    return filtered_col

def get_gd_cloud_free_col(roi, thresh):
    
    gd = ee.ImageCollection("GOOGLE/DYNAMICWORLD/V1").filterBounds(roi)
    gd = filter_col(gd,roi,'label',thresh)
    
    return gd

def filter_non_empty_bands(collection):
    def has_bands(image):
        # Vérifier si l'image a des bandes en comptant les noms des bandes
        band_count = image.bandNames().size()
        return image.set('band_count', band_count)
    
    # Ajouter la propriété 'has_bands' à chaque image
    collection_with_band_info = collection.map(has_bands)
    
    # Filtrer les images qui contiennent au moins une bande
    filtered_collection = collection_with_band_info.filter(ee.Filter.gt('band_count', 0))
    
    return filtered_collection

def create_monthly_composites(collection):
    def add_year_month(image):
        date = ee.Date(image.get('system:time_start'))
        year = date.get('year')
        month = date.get('month')
        return image.set('year', year).set('month', month)
    
    # Ajouter les propriétés 'year' et 'month' à chaque image
    collection = collection.map(add_year_month)
    
    # Obtenir la liste des années et des mois uniques
    years = ee.List(collection.aggregate_array('year')).distinct().sort()
    months = ee.List.sequence(1, 12)
    
    # Fonction pour créer les composites mensuels
    def composite_year_month(year, month):
        year = ee.Number(year)
        month = ee.Number(month)
        filtered = collection.filter(ee.Filter.calendarRange(year, year, 'year'))\
                             .filter(ee.Filter.calendarRange(month, month, 'month'))
        return filtered.median().set('year', year).set('month', month).set('system:time_start', ee.Date.fromYMD(year, month, 1))

    # Créer les composites pour chaque année et chaque mois
    composites = years.map(lambda year: months.map(lambda month: composite_year_month(year, month))).flatten()

    # Retourner la collection de composites
    return ee.ImageCollection(composites)

def calculate_pixel_counts(image):
    pixel_count_stats = image.select('label').reduceRegion(
        reducer=ee.Reducer.frequencyHistogram().unweighted(),
        geometry=roi,
        scale=10,
        maxPixels=1e10
    )
    pixel_counts = ee.Dictionary(pixel_count_stats.get('label'))
    return ee.Feature(None,pixel_counts).set({'year': image.get('year')}).set({'month': image.get('month')})

def format_date(image):
    return image.set('date', image.date().format('Y-MM-dd'))

def remap_label_values(image):
    # Convertir les valeurs de la bande 'label' en entiers
    label_band = image.select('label').toInt()
    return image.addBands(label_band, overwrite=True)

# Per year

In [27]:
gd = get_gd_cloud_free_col(roi, 0.3)
gd_composites = create_monthly_composites(gd)
gd_composites = filter_non_empty_bands(gd_composites)
display(gd_composites)
gd_composites_masked = gd_composites.map(lambda image: image.updateMask(wetlands))
gd_composites_masked = gd_composites_masked.map(remap_label_values)
display(gd_composites_masked)

In [28]:
gd_ts = gd_composites_masked.map(calculate_pixel_counts)
display(gd_ts)

In [29]:
gd_ts_df_raw = ee.data.computeFeatures({
        'expression': gd_ts,
        'fileFormat': 'PANDAS_DATAFRAME'
    })

In [97]:
import plotly.express as px

# Assuming gd_ts_raw is your initial DataFrame
legend_dw = {
    '0': 'Water',
    '1': 'Trees',
    '2': 'Grass',
    '3': 'Flooded Vegetation',
    '4': 'Crops',
    '5': 'Shrub and Scrub',
    '6': 'Built',
    '7': 'Bare',
    '8': 'Snow and Ice',
}

# Assuming gd_ts_df is your DataFrame
gd_ts_df = gd_ts_df_raw.copy()

# Drop any unnecessary columns
gd_ts_df = gd_ts_df.drop(columns=['geo', 'null'])

# Ensure the date column is created
gd_ts_df['date'] = pd.to_datetime(gd_ts_df[['year', 'month']].assign(day=1))

# Select only the numeric columns (excluding 'year', 'month', 'date')
numeric_columns = gd_ts_df.select_dtypes(include=[float, int]).columns

# Calculate the proportions only for the numeric columns
gd_ts_df_proportions = gd_ts_df[numeric_columns].div(gd_ts_df[numeric_columns].sum(axis=1), axis=0)

# Add back the non-numeric columns like 'date'
gd_ts_df_proportions['date'] = gd_ts_df['date']
gd_ts_df_proportions['year'] = gd_ts_df['year']

# Melt the DataFrame to long format
gd_ts_df_melted = gd_ts_df_proportions.melt(id_vars=['date', 'year'], var_name='class', value_name='proportion')

# Rename the columns based on the legend
gd_ts_df_melted['class'] = gd_ts_df_melted['class'].map(legend_dw)

In [33]:
# Create the line plot
import plotly.express as px

fig = px.line(
    gd_ts_df_melted, 
    x='date', 
    y='proportion', 
    color='class',
    labels={'proportion': 'Proportion', 'date': 'Date', 'class': 'Land Cover Class'},
    title="Annual proportion of the surface occupied by each land cover class. <br> Derived from the Dynamic World dataset",
    color_discrete_map={
        #"Water": "#419bdf",
        #"Trees": "#397d49",
        "Grass": "#88b053",
        "Flooded Vegetation": "#7a87c6",
        "Crops": "#e49635",
        "Shrub and Scrub": "#dfc35a",
        #"Built": "#c4281b",
        "Bare": "#a59b8f",
        #"Snow and Ice": "#b39fe1",
    }
)

# Adjust the layout to make the title font smaller
fig.update_layout(
    title={
        'x': 0.5,
        'y': 0.95,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 14}
    },
    xaxis=dict(
        tickformat='%b %Y',  # Format dates as Month Year
    )
)

# Show the graph
fig.show()


In [52]:
import os
import pandas as pd

# Chemin du dossier contenant les fichiers
folder_path = 'csv_model_1a_l8_l7_l9_monthly_composite/'

# Liste de tous les fichiers CSV (vous pouvez ajuster cette liste si nécessaire)
csv_files = [
    'landsat_L7_2015_2016_wetlands.csv',
    'landsat_L7_2017_2018_wetlands.csv',
    'landsat_L7_2019_2020_wetlands.csv',
    'landsat_L7_2021_2022_wetlands.csv',
    'landsat_L7_2023_2024_wetlands.csv',
    'landsat_L8_2015_2016_wetlands.csv',
    'landsat_L8_2017_2018_wetlands.csv',
    'landsat_L8_2019_2020_wetlands.csv',
    'landsat_L8_2021_2022_wetlands.csv',
    'landsat_L8_2023_2024_wetlands.csv',
    'landsat_L9_2021_2022_wetlands.csv',
    'landsat_L9_2023_2024_wetlands.csv'
]

# Charger et concaténer les fichiers
dataframes = []
for file in csv_files:
    file_path = os.path.join(folder_path, file)  # Construire le chemin complet du fichier
    if os.path.exists(file_path):
        try:
            df = pd.read_csv(file_path)
            dataframes.append(df)
        except Exception as e:
            print(f"Erreur lors de la lecture du fichier {file_path}: {e}")
    else:
        print(f"Fichier non trouvé : {file_path}")

if dataframes:
    combined_df_raw = pd.concat(dataframes, ignore_index=True)
else:
    combined_df_raw = pd.DataFrame()



In [53]:
combined_df_raw.head()

Unnamed: 0,geo,0,1,2,5,8,month,null,year,4,6,3,7,sensor
0,,973.0,648,310.0,1125.0,785.0,3,39671,2015,,,,,L7
1,,1365.0,107,79.0,666.0,1603.0,4,39560,2015,115.0,17.0,,,L7
2,,2431.0,1293,15.0,185.0,14.0,6,39560,2015,,,14.0,,L7
3,,929.0,1766,32.0,167.0,1.0,7,40608,2015,,,9.0,,L7
4,,1470.0,1837,167.0,438.0,,8,39560,2015,,,40.0,,L7


In [96]:
import pandas as pd
import numpy as np

legend_dw = {
    '0': 'Water',
    '1': 'Trees',
    '2': 'Grass',
    '3': 'Flooded Vegetation',
    '4': 'Crops',
    '5': 'Shrub and Scrub',
    '6': 'Built',
    '7': 'Bare',
    '8': 'Snow and Ice',
}

# Assuming combined_df_raw is your initial DataFrame
combined_df = combined_df_raw.copy()

# Drop any unnecessary columns
combined_df = combined_df.drop(columns=['geo', 'null'], errors='ignore')

# Ensure the date column is created
combined_df['date'] = pd.to_datetime(combined_df[['year', 'month']].assign(day=1))

# Drop the year and month columns as they are no longer needed
combined_df = combined_df.drop(columns=['year', 'month'], errors='ignore')

# Group by date and aggregate numeric columns
numeric_columns = combined_df.select_dtypes(include=[float, int]).columns
combined_df = combined_df.groupby('date').agg(lambda x: x.mean() if np.issubdtype(x.dtype, np.number) else x.iloc[0]).reset_index()

# Calculate the proportions only for the numeric columns
combined_df_proportions = combined_df[numeric_columns].div(combined_df[numeric_columns].sum(axis=1), axis=0)

# Add back the non-numeric columns like 'date'
combined_df_proportions['date'] = combined_df['date']

# Melt the DataFrame to long format
combined_df_melted = combined_df_proportions.melt(id_vars=['date'], var_name='class', value_name='proportion')

# Map the numeric class labels to their string equivalents using legend_dw
combined_df_melted['class'] = combined_df_melted['class'].map(legend_dw)

# Display the resulting DataFrame
combined_df_melted.head()


Unnamed: 0,date,class,proportion
0,2015-02-01,Water,0.019857
1,2015-03-01,Water,0.155405
2,2015-04-01,Water,0.363222
3,2015-06-01,Water,0.615132
4,2015-07-01,Water,0.319904


In [74]:
# Create the line plot
import plotly.express as px

fig = px.line(
    combined_df_melted, 
    x='date', 
    y='proportion', 
    color='class',
    labels={'proportion': 'Proportion', 'date': 'Date', 'class': 'Land Cover Class'},
    title="Annual proportion of the surface occupied by each land cover class. <br> Derived from the Dynamic World dataset",
    color_discrete_map={
        #"Water": "#419bdf",
        #"Trees": "#397d49",
        "Grass": "#88b053",
        "Flooded Vegetation": "#7a87c6",
        "Crops": "#e49635",
        "Shrub and Scrub": "#dfc35a",
        #"Built": "#c4281b",
        "Bare": "#a59b8f",
        #"Snow and Ice": "#b39fe1",
    }
)

# Adjust the layout to make the title font smaller
fig.update_layout(
    title={
        'x': 0.5,
        'y': 0.95,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 14}
    },
    xaxis=dict(
        tickformat='%b %Y',  # Format dates as Month Year
    )
)

# Show the graph
fig.show()

TypeError: incompatible index of inserted column with frame index

In [95]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

gd_ts_df_melted['proportion'] = gd_ts_df_melted['proportion'].fillna(0)
combined_df_melted['proportion'] = combined_df_melted['proportion'].fillna(0)

# Liste des classes à exclure
classes_to_exclude = ['Bare', 'Built']

# Filtrer les données pour gd_ts_df_melted
gd_ts_df_filtered = gd_ts_df_melted[~gd_ts_df_melted['class'].isin(classes_to_exclude)]

# Filtrer les données pour combined_df_melted
combined_df_filtered = combined_df_melted[~combined_df_melted['class'].isin(classes_to_exclude)]

color_discrete_map = {
    "Water": "#419bdf",
    "Trees": "#397d49",
    "Grass": "#88b053",
    "Flooded Vegetation": "#7a87c6",
    "Crops": "#e49635",
    "Shrub and Scrub": "#dfc35a",
    #"Built": "#c4281b",
    "Bare": "#a59b8f",
    "Snow and Ice": "#b39fe1",
}

# Créer des subplots avec deux graphiques distincts
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1,
                    subplot_titles=("Time Series of Land Cover Class Proportions from Monthly Composite of Pre-classified Dynamic World Maps", 
                                    "Time Series of Land Cover Class Proportions from Model 1 Classification of Landsat Monthly Composites"))

# Tracer le premier DataFrame gd_ts_df_filtered
for cls in gd_ts_df_filtered['class'].unique():
    filtered_df = gd_ts_df_filtered[gd_ts_df_filtered['class'] == cls]
    fig.add_trace(go.Scatter(
        x=filtered_df['date'], y=filtered_df['proportion'],
        mode='lines+markers', name=f"GD_TS - {cls}",
        line=dict(color=color_discrete_map.get(cls, '#000000'))  # Utiliser la couleur définie ou noir par défaut
    ), row=1, col=1)

# Tracer le second DataFrame combined_df_filtered
for cls in combined_df_filtered['class'].unique():
    filtered_df = combined_df_filtered[combined_df_filtered['class'] == cls]
    fig.add_trace(go.Scatter(
        x=filtered_df['date'], y=filtered_df['proportion'],
        mode='lines+markers', name=f"Combined - {cls}",
        line=dict(color=color_discrete_map.get(cls, '#000000'))  # Utiliser la couleur définie ou noir par défaut
    ), row=2, col=1)

# Ajuster la mise en page
fig.update_layout(
    height=800, 
    width=2000, 
    title_text="Time Series Comparison of Land Cover Class Proportions in Wetlands Derived from Dynamic World and Landsat Monthly Composites",
    showlegend=True
)

# Ajouter plus de mois sur l'axe des abscisses
fig.update_xaxes(
    tickformat="%b %Y",  # Format mois et année
    dtick="M3",  # Intervalle de ticks tous les mois
    tickangle=45,  # Angle des labels de ticks pour meilleure lisibilité
    tickmode="linear"  # Mode de ticks linéaire pour afficher chaque mois
)

fig.show()



In [132]:
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from scipy.stats import linregress

# Liste des classes à comparer
classes_to_compare = ['Grass', 'Shrub and Scrub'] #Water', 'Trees']'Flooded Vegetation', 'Crops',

# Couleurs associées aux classes
color_discrete_map = {
    "Water": "#419bdf",
    "Trees": "#397d49",
    "Grass": "#88b053",
    "Flooded Vegetation": "#7a87c6",
    "Crops": "#e49635",
    "Shrub and Scrub": "#dfc35a"
}

# Créer une figure
fig = go.Figure()

# Boucle sur chaque classe
for cls in classes_to_compare:
    # Filtrer pour la classe spécifique
    class_gd_ts = gd_ts_df_filtered[gd_ts_df_filtered['class'] == cls]
    class_combined = combined_df_filtered[combined_df_filtered['class'] == cls]
    
    # Aligner les deux séries temporelles sur les mêmes dates
    merged_class = pd.merge(class_gd_ts[['date', 'proportion']], 
                            class_combined[['date', 'proportion']], 
                            on='date', suffixes=('_gd_ts', '_combined'))
    
    # Calculer la corrélation entre les deux séries de proportions
    correlation = merged_class['proportion_gd_ts'].corr(merged_class['proportion_combined'])
    
    # Ajouter les points au graphique
    fig.add_trace(go.Scatter(
        x=merged_class['proportion_gd_ts'], y=merged_class['proportion_combined'],
        mode='markers', name=f'{cls} (r={correlation:.2f})',
        marker=dict(color=color_discrete_map.get(cls, '#000000'))
    ))
    
    # Calculer la courbe de tendance (régression linéaire)
    slope, intercept, r_value, p_value, std_err = linregress(merged_class['proportion_gd_ts'], merged_class['proportion_combined'])
    
    # Calculer les résiduels et l'écart-type des résiduels
    predicted_values = slope * merged_class['proportion_gd_ts'] + intercept
    residuals = merged_class['proportion_combined'] - predicted_values
    residual_std = np.std(residuals)
    
    # Ajouter la courbe de tendance au graphique avec l'écart-type des résiduels dans la légende
    fig.add_trace(go.Scatter(
        x=merged_class['proportion_gd_ts'], y=predicted_values,
        mode='lines', name=f'{cls} Trend (Std Dev: {residual_std:.4f})',
        line=dict(color=color_discrete_map.get(cls, '#000000'), dash='dash')
    ))

# Ajuster la mise en page
fig.update_layout(
    title="Correlation of Land Cover Class Proportions between Dynamic World and Landsat",
    xaxis_title="Proportion from Dynamic World",
    yaxis_title="Proportion from Landsat",
    height=800,
    width=1200,
    showlegend=True
)

# Afficher le graphique
fig.show()



In [125]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Filtrer les données pour les classes d'intérêt
def calculate_ratio(df):
    shrub_and_scrub = df[df['class'] == 'Shrub and Scrub']['proportion'].reset_index(drop=True)
    grass = df[df['class'] == 'Grass']['proportion'].reset_index(drop=True)
    crops = df[df['class'] == 'Crops']['proportion'].reset_index(drop=True)
    # Calculer le rapport Shrub and Scrub / (Grass + Crops)
    ratio = (grass + crops) / shrub_and_scrub 
    return ratio

# Calculer le rapport pour Landsat et Sentinel (Dynamic World)
landsat_ratio = calculate_ratio(combined_df_filtered)
sentinel_ratio = calculate_ratio(gd_ts_df_filtered)

# Ajouter les dates correspondantes
landsat_ratio_df = pd.DataFrame({'date': combined_df_filtered['date'].unique(), 'ratio': landsat_ratio})
sentinel_ratio_df = pd.DataFrame({'date': gd_ts_df_filtered['date'].unique(), 'ratio': sentinel_ratio})


# Créer les subplots
fig = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.1,
                    subplot_titles=("Time Series of Ratio of the proportions of Grass + Flooded Vegetation + Crops to Shrub and Scrub <br> in wetlands from Landsat monthly composite classification maps",
                                    "Time Series of Ratio of the proportions of Grass + Flooded Vegetation + Crops to Shrub and Scrub <br> in wetlands from Dynamic World monthly composite classification maps"))

# Tracer le ratio pour Landsat
fig.add_trace(go.Scatter(
    x=landsat_ratio_df['date'], y=landsat_ratio_df['ratio'],
    mode='lines+markers', name='Landsat Ratio',
    line=dict(color='green')
), row=1, col=1)

# Tracer le ratio pour Sentinel (Dynamic World)
fig.add_trace(go.Scatter(
    x=sentinel_ratio_df['date'], y=sentinel_ratio_df['ratio'],
    mode='lines+markers', name='DW Ratio',
    line=dict(color='green')
), row=2, col=1)

fig.update_xaxes(
    tickformat="%b %Y",  # Format mois et année
    dtick="M3",  # Intervalle de ticks tous les mois
    tickangle=45,  # Angle des labels de ticks pour meilleure lisibilité
    tickmode="linear"  # Mode de ticks linéaire pour afficher chaque mois
)

# Ajuster la mise en page du graphique de corrélation
fig.update_layout(
    title="",
    height=600,
    width=1000,
    showlegend=True
)
# Afficher le graphique
fig.show()


# Calculer la corrélation entre les deux ratios
merged_ratio = pd.merge(landsat_ratio_df, sentinel_ratio_df, on='date', suffixes=('_landsat', '_sentinel'))
merged_ratio = merged_ratio.dropna()
merged_ratio = merged_ratio[np.isfinite(merged_ratio).all(1)]
correlation_ratio = merged_ratio['ratio_landsat'].corr(merged_ratio['ratio_sentinel'])

# Créer un graphique pour afficher la corrélation
fig_corr = go.Figure()

# Ajouter les points au graphique
fig_corr.add_trace(go.Scatter(
    x=merged_ratio['ratio_landsat'], y=merged_ratio['ratio_sentinel'],
    mode='markers', name=f'Correlation (r={correlation_ratio:.2f})',
    marker=dict(color='purple')
))


# Ajouter la courbe de tendance (régression linéaire)
slope, intercept, r_value, p_value, std_err = linregress(merged_ratio['ratio_landsat'], merged_ratio['ratio_sentinel'])

# Calculer les valeurs prédites par la courbe de tendance
predicted_values = slope * merged_ratio['ratio_landsat'] + intercept

# Calculer les résiduels (différence entre les valeurs observées et les valeurs prédites)
residuals = merged_ratio['ratio_sentinel'] - predicted_values
residual_std = np.std(residuals)

# Calculer les bornes supérieures et inférieures de la bande de confiance
upper_bound = predicted_values + residual_std
lower_bound = predicted_values - residual_std

# Ajouter la courbe de tendance au graphique, avec la dispersion dans la légende
fig_corr.add_trace(go.Scatter(
    x=merged_ratio['ratio_landsat'], y=slope * merged_ratio['ratio_landsat'] + intercept,
    mode='lines', name=f'Trend Line (Std Dev: {residual_std:.4f})',
    line=dict(color='purple', dash='dash')
))

# Ajouter la bande de confiance au graphique, avec la dispersion dans la légende
fig_corr.add_trace(go.Scatter(
    x=merged_ratio['ratio_landsat'], y=upper_bound,
    mode='lines', name=f'Upper Bound & Lower Bound',
    line=dict(color='rgba(255,0,0,0.2)'), showlegend=True
))

fig_corr.add_trace(go.Scatter(
    x=merged_ratio['ratio_landsat'], y=lower_bound,
    mode='lines', name=f'Lower Bound',
    line=dict(color='rgba(255,0,0,0.2)'), showlegend=False
))

# Ajuster la mise en page du graphique de corrélation
fig_corr.update_layout(
    title="Correlation of  Ratio between Landsat and Dynamic World data",
    xaxis_title="Landsat Ratio",
    yaxis_title="DW Ratio",
    height=600,
    width=800,
    showlegend=True
)

# Afficher le graphique de corrélation
fig_corr.show()
