# Classification time series : Monthly composite from model 1a

## Entire Bredforsen

In [2]:
import os
import pandas as pd

In [36]:

# Chemin vers les fichiers CSV
csv_files = [
    "landsat_L9_2023_2024.csv", "landsat_L8_2019_2020.csv", "landsat_L8_2021_2022.csv",
    "landsat_L8_2023_2024.csv", "landsat_L9_2021_2022.csv", "landsat_L7_2003_2004.csv",
    "landsat_L7_2005_2006.csv", "landsat_L7_2007_2008.csv", "landsat_L7_2009_2010.csv",
    "landsat_L7_2011_2012.csv", "landsat_L7_2013_2014.csv", "landsat_L7_2015_2016.csv",
    "landsat_L7_2017_2018.csv", "landsat_L7_2019_2020.csv", "landsat_L7_2021_2022.csv",
    "landsat_L8_2013_2014.csv", "landsat_L8_2015_2016.csv", "landsat_L8_2017_2018.csv",
    "landsat_L7_2001_2002.csv", "landsat_L7_1999_2000.csv"
]

# Lecture et concaténation des CSV
dataframes = []
for file in csv_files:
    df = pd.read_csv(f"csv_model_1a_l8_l7_l9_monthly_composite/{file}")
    
    # Supprimer la colonne 'geo', si elle existe
    df.drop(columns=['geo'], errors='ignore', inplace=True)
    
    # Créer une colonne 'date' à partir des colonnes 'year' et 'month'
    if 'year' in df.columns and 'month' in df.columns:
        df['date'] = pd.to_datetime(df['year'].astype(str) + '-' + df['month'].astype(str) + '-01')
        df.drop(columns=['year', 'month'], inplace=True)
    
    dataframes.append(df)

# Concaténation des DataFrames
data = pd.concat(dataframes, ignore_index=True)

In [54]:

# Renommer les colonnes des classes
class_mapping = {
    '0': "water",
    '1': "trees",
    '2': "grass",
    '3': "flooded_vegetation",
    '4': "crops",
    '5': "shrub_and_scrub",
    '6': "built",
    '7': "bare",
    '8': "snow_and_ice",
    'null': "null"
}
data.rename(columns=class_mapping, inplace=True)

# Remplacer les valeurs manquantes par 0
data.fillna(0, inplace=True)

# Réorganiser les données
class_columns = list(class_mapping.values())
melted_data = data.melt(id_vars=['date'], value_vars=class_columns, var_name='class', value_name='surface')

# Calculer la moyenne des surfaces pour chaque classe par date
mean_data = melted_data.groupby(['date', 'class']).mean().reset_index()

# Créer le graphique avec toutes les classes moyennées
# Calculer la somme des surfaces pour toutes les classes à chaque date
mean_data['total_surface'] = mean_data.groupby('date')['surface'].transform('sum')

# Calculer la proportion de chaque classe par rapport à la surface totale
mean_data['proportion'] = mean_data['surface'] / mean_data['total_surface']

fig = px.line(mean_data, x='date', y='proportion', color='class',
              color_discrete_map={
                  "water": "#419bdf",
                  "trees": "#397d49",
                  "grass": "#88b053",
                  "flooded_vegetation": "#7a87c6",
                  "crops": "#e49635",
                  "shrub_and_scrub": "#dfc35a",
                  "built": "#c4281b",
                  "bare": "#a59b8f",
                  "snow_and_ice": "#b39fe1",
                  "null": "#ff97ff"
              })
import plotly.graph_objects as go

# Ajuster le titre avec une taille de police plus petite
fig.update_layout(
    title=go.layout.Title(
        text="Proportion of Surface Area for each Google Dynamic Land Cover Classes. <br> Derived from the classification of Landsat 7, 8, and 9 monthly composites using Model 1.a",
        xref="paper",
        x=0.5,
        font=dict(size=14)  # Taille du texte réduite à 14
    )
)

# Afficher le graphique
fig.show()


In [56]:
# Liste des classes à conserver
classes_to_keep = ["shrub_and_scrub", "grass", "flooded_vegetation", "crops", "bare"]

# Filtrer les données pour ne conserver que les classes spécifiées
filtered_data = mean_data[mean_data['class'].isin(classes_to_keep)]

fig = px.line(filtered_data, x='date', y='proportion', color='class',
              color_discrete_map={
                  "grass": "#88b053",
                  "flooded_vegetation": "#7a87c6",
                  "crops": "#e49635",
                  "shrub_and_scrub": "#dfc35a",
                  "bare": "#a59b8f",
              })

import plotly.graph_objects as go

# Ajuster le titre avec une taille de police plus petite
fig.update_layout(
    title=go.layout.Title(
        text="Proportion of Surface Area for each Google Dynamic Land Cover Classes. <br> Derived from the classification of Landsat 7, 8, and 9 monthly composites using Model 1.a",
        xref="paper",
        x=0.5,
        font=dict(size=14)  # Taille du texte réduite à 14
    )
)

# Afficher le graphique
fig.show()


## Wetlands

In [64]:
# Chemin vers les fichiers CSV
csv_files = [
    "landsat_L9_2023_2024_wetlands.csv", "landsat_L8_2019_2020_wetlands.csv", "landsat_L8_2021_2022_wetlands.csv",
    "landsat_L8_2023_2024_wetlands.csv", "landsat_L9_2021_2022_wetlands.csv", "landsat_L7_2003_2004_wetlands.csv",
    "landsat_L7_2005_2006_wetlands.csv", "landsat_L7_2007_2008_wetlands.csv", "landsat_L7_2009_2010_wetlands.csv",
    "landsat_L7_2011_2012_wetlands.csv", "landsat_L7_2013_2014_wetlands.csv", "landsat_L7_2015_2016_wetlands.csv",
    "landsat_L7_2017_2018_wetlands.csv", "landsat_L7_2019_2020_wetlands.csv", "landsat_L7_2021_2022_wetlands.csv",
    "landsat_L8_2013_2014_wetlands.csv", "landsat_L8_2015_2016_wetlands.csv", "landsat_L8_2017_2018_wetlands.csv",
    "landsat_L7_2001_2002_wetlands.csv", "landsat_L7_1999_2000_wetlands.csv"
]

# Lecture et concaténation des CSV
dataframes = []
for file in csv_files:
    df = pd.read_csv(f"csv_model_1a_l8_l7_l9_monthly_composite/{file}")
    
    # Supprimer la colonne 'geo', si elle existe
    df.drop(columns=['geo'], errors='ignore', inplace=True)
    
    # Créer une colonne 'date' à partir des colonnes 'year' et 'month'
    if 'year' in df.columns and 'month' in df.columns:
        df['date'] = pd.to_datetime(df['year'].astype(str) + '-' + df['month'].astype(str) + '-01')
        df.drop(columns=['year', 'month'], inplace=True)
    
    dataframes.append(df)

# Concaténation des DataFrames
data = pd.concat(dataframes, ignore_index=True)

In [89]:
# Renommer les colonnes des classes
class_mapping = {
    '0': "water",
    '1': "trees",
    '2': "grass",
    '3': "flooded_vegetation",
    '4': "crops",
    '5': "shrub_and_scrub",
    '6': "built",
    '7': "bare",
    '8': "snow_and_ice",
    'null': "null"
}
data.rename(columns=class_mapping, inplace=True)

# Remplacer les valeurs manquantes par 0
data.fillna(0, inplace=True)

# Réorganiser les données
class_columns = list(class_mapping.values())
melted_data = data.melt(id_vars=['date'], value_vars=class_columns, var_name='class', value_name='surface')

# Calculer la moyenne des surfaces pour chaque classe par date
mean_data = melted_data.groupby(['date', 'class']).mean().reset_index()

# Créer le graphique avec toutes les classes moyennées
# Calculer la somme des surfaces pour toutes les classes à chaque date

classes_to_keep = ["shrub_and_scrub", "grass", "flooded_vegetation", "crops", "bare", "water", "trees", "snow_and_ice"]

# Filtrer les données pour ne conserver que les classes spécifiées
mean_data = mean_data[mean_data['class'].isin(classes_to_keep)]

mean_data['total_surface'] = mean_data.groupby('date')['surface'].transform('sum')

# Calculer la proportion de chaque classe par rapport à la surface totale
mean_data['proportion'] = mean_data['surface'] / mean_data['total_surface']

classes_to_keep = ["shrub_and_scrub", "grass", "flooded_vegetation", "crops", 'water','snow_and_ice']

# Filtrer les données pour ne conserver que les classes spécifiées
mean_data = mean_data[mean_data['class'].isin(classes_to_keep)]

fig = px.line(mean_data, x='date', y='proportion', color='class',
              color_discrete_map={
                  "water": "#419bdf",
                  "trees": "#397d49",
                  "grass": "#88b053",
                  "flooded_vegetation": "#7a87c6",
                  "crops": "#e49635",
                  "shrub_and_scrub": "#dfc35a",
                  "built": "#c4281b",
                  "bare": "#a59b8f",
                  "snow_and_ice": "#b39fe1",
                  "null": "#ff97ff"
              })
import plotly.graph_objects as go

# Ajuster le titre avec une taille de police plus petite
fig.update_layout(
    title=go.layout.Title(
        text="Proportion of Surface Area for each Google Dynamic Land Cover Classes. <br> Derived from the classification of Landsat 7, 8, and 9 monthly composites using Model 1.a",
        xref="paper",
        x=0.5,
        font=dict(size=14)  # Taille du texte réduite à 14
    )
)

# Afficher le graphique
fig.show()

In [69]:

# Renommer les colonnes des classes
class_mapping = {
    '0': "water",
    '1': "trees",
    '2': "grass",
    '3': "flooded_vegetation",
    '4': "crops",
    '5': "shrub_and_scrub",
    '6': "built",
    '7': "bare",
    '8': "snow_and_ice",
    'null': "null"
}
data.rename(columns=class_mapping, inplace=True)

# Remplacer les valeurs manquantes par 0
data.fillna(0, inplace=True)

# Réorganiser les données
class_columns = list(class_mapping.values())
melted_data = data.melt(id_vars=['date'], value_vars=class_columns, var_name='class', value_name='surface')

# Filtrer pour les classes d'intérêt
classes_of_interest = ["grass", "flooded_vegetation", "crops", "shrub_and_scrub"]
filtered_data = melted_data[melted_data['class'].isin(classes_of_interest)]

# Calculer la somme des surfaces pour 'grass', 'flooded_vegetation', et 'crops' par date
sum_grass_flooded_crops = filtered_data[filtered_data['class'].isin(["grass", "flooded_vegetation", "crops"])] \
    .groupby('date')['surface'].sum().reset_index()

# Renommer la colonne 'surface' pour indiquer qu'il s'agit de la somme de 'grass', 'flooded_vegetation', et 'crops'
sum_grass_flooded_crops.rename(columns={'surface': 'grass_flooded_crops_sum'}, inplace=True)

# Extraire les données pour 'shrub_and_scrub'
shrub_data = filtered_data[filtered_data['class'] == "shrub_and_scrub"]

# Fusionner les deux DataFrames pour aligner les dates
merged_data = pd.merge(sum_grass_flooded_crops, shrub_data, on='date', how='inner')

# Calculer la proportion de 'shrub_and_scrub' par rapport à 'grass + flooded_vegetation + crops'
merged_data['proportion'] = merged_data['surface'] / merged_data['grass_flooded_crops_sum']

# Créer le graphique de la proportion
fig = px.line(merged_data, x='date', y='proportion',
              title="Proportion of Shrub and Scrub Relative to Grass, Flooded Vegetation, and Crops",
              labels={'proportion': 'Shrub and Scrub / (Grass + Flooded Vegetation + Crops)'})

# Ajuster le titre avec une taille de police plus petite
fig.update_layout(
    title=go.layout.Title(
        text="Proportion of Shrub and Scrub Relative to Grass, Flooded Vegetation, and Crops.<br>Derived from the classification of Landsat 7, 8, and 9 monthly composites using Model 1.a",
        xref="paper",
        x=0.5,
        font=dict(size=14)  # Taille du texte réduite à 14
    )
)

# Afficher le graphique
fig.show()

# Classification time series : Each observations from model 1a

## In wetlands

In [None]:
# Chemin vers les fichiers CSV
csv_files = [
    "landsat_L9_2023_2024_each_observations.csv", "landsat_L8_2019_2020_each_observations.csv", "landsat_L8_2021_2022_each_observations.csv",
    "landsat_L8_2023_2024_each_observations.csv", "landsat_L7_2003_2004_each_observations.csv",
    "landsat_L7_2005_2006_each_observations.csv", "landsat_L7_2007_2008_each_observations.csv", "landsat_L7_2009_2010_each_observations.csv",
    "landsat_L7_2011_2012_each_observations.csv", "landsat_L7_2013_2014_each_observations.csv", "landsat_L7_2015_2016_each_observations.csv",
    "landsat_L7_2017_2018_each_observations.csv", "landsat_L7_2019_2020_each_observations.csv", "landsat_L7_2021_2022_each_observations.csv",
    "landsat_L8_2013_2014_each_observations.csv", "landsat_L8_2015_2016_each_observations.csv", "landsat_L8_2017_2018_each_observations.csv",
    "landsat_L7_2001_2002_each_observations.csv", "landsat_L7_1999_2000_each_observations.csv"
]

# Lecture et concaténation des CSV
dataframes = []
for file in csv_files:
    df = pd.read_csv(f"{file}")
    
    # Supprimer la colonne 'geo', si elle existe
    df.drop(columns=['geo'], errors='ignore', inplace=True)
    
    # Créer une colonne 'date' à partir des colonnes 'year' et 'month'
    if 'year' in df.columns and 'month' in df.columns:
        df['date'] = pd.to_datetime(df['year'].astype(str) + '-' + df['month'].astype(str) + '-01')
        df.drop(columns=['year', 'month'], inplace=True)
    
    dataframes.append(df)

# Concaténation des DataFrames
data = pd.concat(dataframes, ignore_index=True)

data.head()


Unnamed: 0,0,1,5,8,date,null,2,3,4,7,6,sensor
0,109.0,18,41.0,1565.0,2023-04-08,19726,,,,,,L9
1,1159.0,178,431.0,85.0,2023-05-10,19494,103.0,3.0,5.0,1.0,,L9
2,774.0,691,267.0,5.0,2023-06-11,19494,223.0,,5.0,,,L9
3,895.0,683,253.0,20.0,2024-05-28,19494,60.0,3.0,49.0,,2.0,L9
4,401.0,1370,29.0,,2024-06-29,19494,164.0,1.0,,,,L9


In [86]:
# Renommer les colonnes des classes
class_mapping = {
    '0': "water",
    '1': "trees",
    '2': "grass",
    '3': "flooded_vegetation",
    '4': "crops",
    '5': "shrub_and_scrub",
    '6': "built",
    '7': "bare",
    '8': "snow_and_ice",
    'null': "null"
}

# Renommer les colonnes en fonction du mapping
data.rename(columns=class_mapping, inplace=True)

# Réorganiser les données
class_columns = list(class_mapping.values())
melted_data = data.melt(id_vars=['date'], value_vars=class_columns, var_name='class', value_name='surface')

# Calculer la somme des surfaces pour chaque classe par date
mean_data = melted_data.groupby(['date', 'class']).mean().reset_index()

classes_to_keep = ["shrub_and_scrub", "grass", "flooded_vegetation", "crops", "bare", "water", "trees", "snow_and_ice"]

# Filtrer les données pour ne conserver que les classes spécifiées
mean_data = mean_data[mean_data['class'].isin(classes_to_keep)]

# Calculer la somme totale des surfaces pour chaque date
mean_data['total_surface'] = mean_data.groupby('date')['surface'].transform('sum')

# Calculer la proportion de chaque classe par rapport à la surface totale
mean_data['proportion'] = mean_data['surface'] / mean_data['total_surface']

classes_to_keep = ["water", "snow_and_ice"]

# Filtrer les données pour ne conserver que les classes spécifiées
mean_data = mean_data[mean_data['class'].isin(classes_to_keep)]

# Créer le graphique avec les proportions pour toutes les classes
fig = px.line(mean_data, x='date', y='proportion', color='class',
              title="Proportion of Surface Area for each Land Cover Class",
              labels={'proportion': 'Proportion of Surface Area'},
              color_discrete_map={
                  "water": "#419bdf",
                  "trees": "#397d49",
                  "grass": "#88b053",
                  "flooded_vegetation": "#7a87c6",
                  "crops": "#e49635",
                  "shrub_and_scrub": "#dfc35a",
                  "built": "#c4281b",
                  "bare": "#a59b8f",
                  "snow_and_ice": "#b39fe1",
                  "null": "#ff97ff"
              })

# Ajuster le titre avec une taille de police plus petite
fig.update_layout(
    title=go.layout.Title(
        text="Proportion of Surface Area for each Google Dynamic Land Cover Class.<br>Derived from the classification of Landsat 7, 8, and 9 monthly composites using Model 1.a",
        xref="paper",
        x=0.5,
        font=dict(size=14)  # Taille du texte réduite à 14
    ),
    xaxis=dict(
        tickformat='%d-%m-%Y'  # Format des dates en jour-mois-année
    )
)

# Afficher le graphique
fig.show()

# Classification time series : Each observations from model 1a agregated aft classification with .mode()

## In wetlands

In [15]:
# Chemin vers les fichiers CSV


csv_files = [
 'landsat_L7_each_observations_yearly_mode_1999_2005.csv', 'landsat_L7_each_observations_yearly_mode_2006_2012.csv','landsat_L7_each_observations_yearly_mode_2013_2018.csv',
 'landsat_L7_each_observations_yearly_mode_2018_2024.csv','landsat_L8_each_observations_yearly_mode.csv','landsat_L9_each_observations_yearly_mode.csv'
]

# Lecture et concaténation des CSV
dataframes = []
for file in csv_files:
    df = pd.read_csv(f"csv_model_1a_l8_l7_l9_yearly_mode/{file}")
    
    # Supprimer la colonne 'geo', si elle existe
    df.drop(columns=['geo', 'null'], errors='ignore', inplace=True)
    
    dataframes.append(df)

# Concaténation des DataFrames
data = pd.concat(dataframes, ignore_index=True)

In [16]:
data.head()

Unnamed: 0,0,1,2,3,5,8,year,4,sensor,6
0,1492,2271,144,18.0,25,2.0,1999,,L7,
1,3209,642,23,,75,3.0,2000,,L7,
2,2040,1412,342,,158,,2001,,L7,
3,1708,1834,200,,210,,2002,,L7,
4,1890,950,564,2.0,537,,2003,9.0,L7,


In [34]:

# Renommer les colonnes des classes
class_mapping = {
    '0': "water",
    '1': "trees",
    '2': "grass",
    '3': "flooded_vegetation",
    '4': "crops",
    '5': "shrub_and_scrub",
    '6': "built",
    #'8': "snow_and_ice",
}

data.rename(columns=class_mapping, inplace=True)

# Remplacer les valeurs manquantes par 0
data.fillna(0, inplace=True)


# Réorganiser les données
class_columns = list(class_mapping.values())
melted_data = data.melt(id_vars=['year'], value_vars=class_columns, var_name='class', value_name='surface')

# Calculer la moyenne des surfaces pour chaque classe par date
mean_data = melted_data.groupby(['year', 'class']).mean().reset_index()

# Créer le graphique avec toutes les classes moyennées
# Calculer la somme des surfaces pour toutes les classes à chaque date
mean_data['total_surface'] = mean_data.groupby('year')['surface'].transform('sum')

# Calculer la proportion de chaque classe par rapport à la surface totale
mean_data['proportion'] = mean_data['surface'] / mean_data['total_surface']

import plotly.express as px

fig = px.line(mean_data, x='year', y='proportion', color='class',markers=True,
              color_discrete_map={
                  "water": "#419bdf",
                  "trees": "#397d49",
                  "grass": "#88b053",
                  "flooded_vegetation": "#7a87c6",
                  "crops": "#e49635",
                  "shrub_and_scrub": "#dfc35a",
                  "built": "#c4281b",
                  "bare": "#a59b8f",
                  #"snow_and_ice": "#b39fe1",
              })

import plotly.graph_objects as go


# Ajuster le titre avec une taille de police plus petite
fig.update_layout(
    title=go.layout.Title(
    text="Annual proportion of the surface occupied by each land cover class in wetlands. <br> Derived from the classification of L7, L8, and L9 observations <br> and aggregated into a single yearly classification map using .mode()",
        xref="paper",
        x=0.5,
        y = 0.95,
        font=dict(size=14)),  # Taille du texte réduite à 14
        xaxis=dict(
        tickformat='%Y',  # Format des dates en jour-mois-année
        tickmode = 'linear'
    )
)

# Afficher le graphique
fig.show()

# Classification time series : Each observations from model 1d agregated bf classification using median()

In [35]:
# Chemin vers les fichiers CSV


csv_files = [
 'landsat_L7_yearly_composites.csv', 'landsat_L8_yearly_composites.csv', 'landsat_L9_yearly_composites.csv', 
]

# Lecture et concaténation des CSV
dataframes = []
for file in csv_files:
    df = pd.read_csv(f"{file}")
    
    # Supprimer la colonne 'geo', si elle existe
    df.drop(columns=['geo', 'null'], errors='ignore', inplace=True)
    
    dataframes.append(df)

# Concaténation des DataFrames
data = pd.concat(dataframes, ignore_index=True)

In [36]:
data.head()

Unnamed: 0,0,1,2,date,3,5,4,6,7,sensor
0,1011,2839,102,1999-01-01,,,,,,L7
1,2475,1392,76,2000-01-01,5.0,4.0,,,,L7
2,1584,1992,312,2001-01-01,18.0,46.0,,,,L7
3,1449,1835,585,2002-01-01,11.0,72.0,,,,L7
4,1643,1172,1035,2003-01-01,4.0,73.0,25.0,,,L7


In [40]:
# Renommer les colonnes des classes
class_mapping = {
    '0': "water",
    '1': "trees",
    '2': "grass",
    '3': "flooded_vegetation",
    '4': "crops",
    '5': "shrub_and_scrub",
    '6': "built",
    #'8': "snow_and_ice",
}

data.rename(columns=class_mapping, inplace=True)

# Remplacer les valeurs manquantes par 0
data.fillna(0, inplace=True)


# Réorganiser les données
class_columns = list(class_mapping.values())
melted_data = data.melt(id_vars=['date'], value_vars=class_columns, var_name='class', value_name='surface')

# Calculer la moyenne des surfaces pour chaque classe par date
mean_data = melted_data.groupby(['date', 'class']).mean().reset_index()

# Créer le graphique avec toutes les classes moyennées
# Calculer la somme des surfaces pour toutes les classes à chaque date
mean_data['total_surface'] = mean_data.groupby('date')['surface'].transform('sum')

# Calculer la proportion de chaque classe par rapport à la surface totale
mean_data['proportion'] = mean_data['surface'] / mean_data['total_surface']

import plotly.express as px

fig = px.line(mean_data, x='date', y='proportion', color='class',markers=True,
              color_discrete_map={
                  "water": "#419bdf",
                  "trees": "#397d49",
                  "grass": "#88b053",
                  "flooded_vegetation": "#7a87c6",
                  "crops": "#e49635",
                  "shrub_and_scrub": "#dfc35a",
                  "built": "#c4281b",
                  "bare": "#a59b8f",
                  #"snow_and_ice": "#b39fe1",
              })

import plotly.graph_objects as go


# Ajuster le titre avec une taille de police plus petite
fig.update_layout(
    title=go.layout.Title(
    text="Annual proportion of the surface occupied by each land cover class in wetlands. <br> Derived from the classification of L7, L8, and L9 observations <br> and aggregated into a single yearly classification map using .mode()",
        xref="paper",
        x=0.5,
        y = 0.95,
        font=dict(size=14)),  # Taille du texte réduite à 14
        xaxis=dict(
        tickformat='%Y',  # Format des dates en jour-mois-année
        tickmode = 'linear'
    )
)

# Afficher le graphique
fig.show()