In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
!pip install rasterio rioxarray

Collecting rasterio
  Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (9.1 kB)
Collecting rioxarray
  Downloading rioxarray-0.19.0-py3-none-any.whl.metadata (5.5 kB)
Collecting affine (from rasterio)
  Downloading affine-2.4.0-py3-none-any.whl.metadata (4.0 kB)
Collecting cligj>=0.5 (from rasterio)
  Downloading cligj-0.7.2-py3-none-any.whl.metadata (5.0 kB)
Collecting click-plugins (from rasterio)
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl.metadata (6.4 kB)
Downloading rasterio-1.4.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (22.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m22.2/22.2 MB[0m [31m104.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading rioxarray-0.19.0-py3-none-any.whl (62 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m62.2/62.2 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Downloading affine-2.4.0-py3

In [4]:
import os
import xarray as xr
import numpy as np
import rioxarray as rxr
import pandas as pd
from collections import Counter
import plotly.graph_objects as go
# Define class dictionary with colors
clases = ["exploited_peat", "peat", "water", "arid", "forest", "urban", "grass_shrub"]
clas_num = [0, 1, 2, 3, 4, 5, 6]
colors = ["#8B4513", "#A0522D", "#1E90FF", "#FFD700", "#228B22", "#FF0000", "#32CD32"]  # Assign colors
clas_dict = {num: {"name": name, "color": color} for num, name, color in zip(clas_num, clases, colors)}

# 📂 Path to raster files
#raster_folder = "/content/drive/MyDrive/Work/Consulting/Paz_turberas/outputs_dl/multiClass/Landsat/output_landsat_aggregate"  # Update this
raster_folder = "/content/drive/MyDrive/Programming/R projects/Peat_LULC/output"

# 📆 Choose three years for comparison
selected_years = [2014, 2018, 2020]

# 🔍 Read rasters for selected years
raster_data = {}
for year in selected_years:
    raster_path = os.path.join(raster_folder, f"mosaic_class_year_{year}.tif")  # Modify filename if needed
    if os.path.exists(raster_path):
        raster_data[year] = rxr.open_rasterio(raster_path, masked=True).squeeze()
    else:
        print(f"⚠️ Warning: Raster for {year} not found.")

# Convert dictionary to xarray.Dataset
lulc_ds = xr.Dataset(raster_data)

# 🔄 Compute transition matrix
transitions = []

for i in range(len(selected_years) - 1):
    year1, year2 = selected_years[i], selected_years[i + 1]
    lulc_1, lulc_2 = lulc_ds[year1].values.flatten(), lulc_ds[year2].values.flatten()

    # Count transitions between classes
    transitions += [(f"{clas_dict[c1]['name']} ({year1})", f"{clas_dict[c2]['name']} ({year2})")
                    for c1, c2 in zip(lulc_1, lulc_2) if c1 in clas_dict and c2 in clas_dict]

# 🏗️ Create a structured transition DataFrame
transition_counts = Counter(transitions)
df_transitions = pd.DataFrame(transition_counts.items(), columns=["Transition", "Count"])
df_transitions[["From", "To"]] = df_transitions["Transition"].apply(pd.Series)
df_transitions.drop(columns=["Transition"], inplace=True)

# 🎨 Improved Sankey with Consistent Colors
def plot_sankey(df):
    labels = sorted(set(df["From"]).union(set(df["To"])))  # Ordered labels by year
    source = [labels.index(f) for f in df["From"]]
    target = [labels.index(t) for t in df["To"]]
    values = df["Count"].tolist()

    # Extract colors from clas_dict
    label_colors = {name: clas_dict[num]["color"] for num, info in clas_dict.items() for name in labels if info["name"] in name}

    fig = go.Figure(go.Sankey(
        node=dict(
            label=labels,
            color=[label_colors[label] for label in labels],  # Use the same color for the same class
            pad=20,
            thickness=20
        ),
        link=dict(
            source=source,
            target=target,
            value=values
        )
    ))

    fig.update_layout(
        title_text="Land Use/Land Cover Transition in Magallanes",
        font_size=12
    )
    fig.show()

# 📊 Plot the structured Sankey diagram
plot_sankey(df_transitions)

In [11]:
import os
import xarray as xr
import numpy as np
import rioxarray as rxr
import pandas as pd
import geopandas as gpd
from collections import Counter
import plotly.graph_objects as go

# Define class dictionary with colors
clases = ["exploited_peat", "peat", "water", "arid", "forest", "urban", "grass_shrub"]
clas_num = [0, 1, 2, 3, 4, 5, 6]
colors = ["#8B4513", "#A0522D", "#1E90FF", "#FFD700", "#228B22", "#FF0000", "#32CD32"]
clas_dict = {num: {"name": name, "color": color} for num, name, color in zip(clas_num, clases, colors)}

# 📂 Paths
raster_folder = "/content/drive/MyDrive/Programming/R projects/Peat_LULC/output"
shapefile_path = "/content/drive/MyDrive/Work/Consulting/Paz_turberas/limiteMag1.shp"  # Update this
#G:\Mi unidad\Work\Consulting\Paz_turberas
# Load mask shapefile
mask = gpd.read_file(shapefile_path)
mask = mask.geometry.values[0]  # Extract the first polygon if multiple exist

# 📆 Selected years
selected_years = [2014, 2018, 2020]

# 🔍 Read and mask rasters
raster_data = {}
for year in selected_years:
    raster_path = os.path.join(raster_folder, f"mosaic_class_year_{year}.tif")
    if os.path.exists(raster_path):
        # Open and clip raster to shapefile
        raster = rxr.open_rasterio(raster_path, masked=True).squeeze()
        raster_clipped = raster.rio.clip([mask], crs=raster.rio.crs)  # Clip to mask
        raster_data[year] = raster_clipped
    else:
        print(f"⚠️ Warning: Raster for {year} not found.")

# Convert to xarray.Dataset
lulc_ds = xr.Dataset(raster_data)

# 🔄 Compute transitions (only within mask)
transitions = []
for i in range(len(selected_years) - 1):
    year1, year2 = selected_years[i], selected_years[i + 1]
    lulc_1, lulc_2 = lulc_ds[year1].values.flatten(), lulc_ds[year2].values.flatten()

    # Filter out masked values (NaN or nodata)
    valid_pixels = ~np.isnan(lulc_1) & ~np.isnan(lulc_2)
    lulc_1_valid = lulc_1[valid_pixels]
    lulc_2_valid = lulc_2[valid_pixels]

    transitions += [
        (f"{clas_dict[c1]['name']} ({year1})", f"{clas_dict[c2]['name']} ({year2})")
        for c1, c2 in zip(lulc_1_valid, lulc_2_valid)
        if c1 in clas_dict and c2 in clas_dict
    ]

# 🏗️ Create transition DataFrame
transition_counts = Counter(transitions)
df_transitions = pd.DataFrame(transition_counts.items(), columns=["Transition", "Count"])
df_transitions[["From", "To"]] = df_transitions["Transition"].apply(pd.Series)
df_transitions.drop(columns=["Transition"], inplace=True)

# 🎨 Plot Sankey (unchanged)
def plot_sankey(df):
    labels = sorted(set(df["From"]).union(set(df["To"])))
    source = [labels.index(f) for f in df["From"]]
    target = [labels.index(t) for t in df["To"]]
    values = df["Count"].tolist()

    label_colors = {
        name: clas_dict[num]["color"]
        for num, info in clas_dict.items()
        for name in labels if info["name"] in name
    }

    fig = go.Figure(go.Sankey(
        node=dict(
            label=labels,
            color=[label_colors[label] for label in labels],
            pad=20,
            thickness=20
        ),
        link=dict(
            source=source,
            target=target,
            value=values
        )
    ))
    fig.update_layout(title_text="Land Use/Land Cover Transition (Masked)")
    fig.show()

plot_sankey(df_transitions)

In [28]:
import os
import xarray as xr
import numpy as np
import rioxarray as rxr
import pandas as pd
import geopandas as gpd
from collections import Counter
import plotly.graph_objects as go

# ======================
# USER CONFIGURATION
# ======================
# Define your two target years
YEAR1 = 2014
YEAR2 = 2024

# Select which classes to include (use names from 'clases' list below)
SELECTED_CLASSES = ["urban", "peat", "grass_shrub"]  # Modify as needed

# Path configuration
RASTER_FOLDER = "/content/drive/MyDrive/Programming/R projects/Peat_LULC/output"
#SHAPEFILE_PATH = "/path/to/your/mask_shapefile.shp"  # Update this
SHAPEFILE_PATH = "/content/drive/MyDrive/Work/Consulting/Paz_turberas/limiteMag.shp"  # Update this

# ======================
# DATA PROCESSING
# ======================

# Define all classes (don't modify)
clases = ["exploited_peat", "peat", "water", "arid", "forest", "urban", "grass_shrub"]
clas_num = [0, 1, 2, 3, 4, 5, 6]
colors = ["#8B4513", "#A0522D", "#1E90FF", "#FFD700", "#228B22", "#FF0000", "#32CD32"]
clas_dict = {num: {"name": name, "color": color} for num, name, color in zip(clas_num, clases, colors)}

# Filter class dictionary to only selected classes
selected_nums = [num for num, info in clas_dict.items() if info["name"] in SELECTED_CLASSES]
filtered_dict = {num: clas_dict[num] for num in selected_nums}

# Load and reproject shapefile
mask = gpd.read_file(SHAPEFILE_PATH).to_crs("EPSG:32719")
mask_geometry = mask.geometry.values[0]

# Load and process rasters
def load_and_mask_raster(year):
    raster_path = os.path.join(RASTER_FOLDER, f"mosaic_class_year_{year}.tif")
    if not os.path.exists(raster_path):
        raise FileNotFoundError(f"Raster for {year} not found at {raster_path}")

    raster = rxr.open_rasterio(raster_path, masked=True).squeeze()
    return raster.rio.reproject("EPSG:32719").rio.clip([mask_geometry], crs="EPSG:32719")

raster1 = load_and_mask_raster(YEAR1)
raster2 = load_and_mask_raster(YEAR2)

# Extract and filter values
def filter_classes(arr, class_dict):
    """Filter array to only include selected classes"""
    arr_filtered = arr.copy()
    valid_classes = list(class_dict.keys())
    arr_filtered[~np.isin(arr, valid_classes)] = np.nan
    return arr_filtered

lulc1 = filter_classes(raster1.values.flatten(), filtered_dict)
lulc2 = filter_classes(raster2.values.flatten(), filtered_dict)

# Compute transitions
valid_pixels = ~np.isnan(lulc1) & ~np.isnan(lulc2)
transitions = [
    (f"{filtered_dict[c1]['name']} ({YEAR1})", f"{filtered_dict[c2]['name']} ({YEAR2})")
    for c1, c2 in zip(lulc1[valid_pixels].astype(int), lulc2[valid_pixels].astype(int))
]

# ======================
# VISUALIZATION
# ======================

# Create transition DataFrame
transition_counts = Counter(transitions)
df = pd.DataFrame(transition_counts.items(), columns=["Transition", "Count"])
df[["From", "To"]] = df["Transition"].apply(pd.Series)
df.drop(columns=["Transition"], inplace=True)

# Generate Sankey diagram
def plot_filtered_sankey(df, class_dict):
    labels = sorted(set(df["From"]).union(set(df["To"])))

    # Get colors for selected classes
    label_colors = {}
    for num, info in class_dict.items():
        for label in labels:
            if info["name"] in label:
                label_colors[label] = info["color"]

    fig = go.Figure(go.Sankey(
        node=dict(
            label=labels,
            color=[label_colors[label] for label in labels],
            pad=20,
            thickness=20
        ),
        link=dict(
            source=[labels.index(f) for f in df["From"]],
            target=[labels.index(t) for t in df["To"]],
            value=df["Count"].tolist()
        )
    ))

    title = f"Land Use Transition {YEAR1}→{YEAR2}<br><sup>Showing: {', '.join(SELECTED_CLASSES)}</sup>"
    fig.update_layout(title_text=title, title_x=0.5)
    fig.show()

plot_filtered_sankey(df, filtered_dict)

In [23]:
"G:\Mi unidad\Work\Consulting\Paz_turberas\outputs_dl\multiClass\Landsat\COPERNICUS_TURBERAS\COPERNICUS_TURBERAS"

import os
import xarray as xr
import numpy as np
import rioxarray as rxr
import pandas as pd
import geopandas as gpd
from collections import Counter
import plotly.graph_objects as go

# ======================
# USER CONFIGURATION
# ======================
# Define your two target years
YEAR1 = 2015
YEAR2 = 2024

# Select which classes to include (use names from 'clases' list below)
SELECTED_CLASSES = ["urban", "peat", "water", "snow_ice"]  # Modify as needed

# Path configuration
RASTER_FOLDER = "/content/drive/MyDrive/Work/Consulting/Paz_turberas/outputs_dl/multiClass/Landsat/COPERNICUS_TURBERAS/COPERNICUS_TURBERAS"
#SHAPEFILE_PATH = "/path/to/your/mask_shapefile.shp"  # Update this
SHAPEFILE_PATH = "/content/drive/MyDrive/Work/Consulting/Paz_turberas/limiteMag2.shp"  # Update this

# ======================
# DATA PROCESSING
# ======================

# Define all classes (don't modify)
clases = ["bogs", "pulvinbogs", "wetlands", "water",
          "agriculture", "forest", "grassland", "shrubland", "urban", "sparse", "bare", "snow_ice"]
clas_num = [10, 11, 12, 1, 2, 3, 4, 5, 6, 7, 8]
colors = ["#8FE4CB", "#C7F1E5", "#3D9970", "#0077BE", "#FFDD55", "#4CAF50", "#A4D96C", "#C8A01D", "#9150CE", "#B07600", "#FF8A3D", "#00DFFF"]
clas_dict = {num: {"name": name, "color": color} for num, name, color in zip(clas_num, clases, colors)}

# Filter class dictionary to only selected classes
selected_nums = [num for num, info in clas_dict.items() if info["name"] in SELECTED_CLASSES]
filtered_dict = {num: clas_dict[num] for num in selected_nums}

# ======================
# SPATIAL ALIGNMENT
# ======================
def validate_spatial_alignment(raster, mask):
    """Ensure CRS and extents match between raster and mask"""
    # Check CRS match
    if raster.rio.crs != mask.crs:
        print(f"⚠️ CRS mismatch: Raster={raster.rio.crs}, Mask={mask.crs}")
        print(f"Reprojecting mask to raster CRS: {raster.rio.crs}")
        mask = mask.to_crs(raster.rio.crs)

    # Check extent overlap
    raster_bounds = raster.rio.bounds()
    mask_bounds = mask.total_bounds
    if not (raster_bounds[0] <= mask_bounds[2] and raster_bounds[2] >= mask_bounds[0] and
            raster_bounds[1] <= mask_bounds[3] and raster_bounds[3] >= mask_bounds[1]):
        raise ValueError("Raster and mask extents do not overlap!")

    return mask

# Load mask and first raster to determine alignment
mask = gpd.read_file(SHAPEFILE_PATH)
sample_raster = rxr.open_rasterio(os.path.join(RASTER_FOLDER, f"{YEAR1}.tif")).squeeze()
mask = validate_spatial_alignment(sample_raster, mask)

# ======================
# DATA PROCESSING (FIXED VERSION)
# ======================
def process_raster(year):
    raster_path = os.path.join(RASTER_FOLDER, f"{year}.tif")
    if not os.path.exists(raster_path):
        raise FileNotFoundError(f"Raster for {year} not found at {raster_path}")

    raster = rxr.open_rasterio(raster_path, masked=True).squeeze()
    # Clip using same bounds for both rasters
    clipped = raster.rio.clip(mask.geometry, crs=raster.rio.crs, all_touched=True)
    return clipped

# Process both rasters with identical clipping
raster1 = process_raster(YEAR1)
raster2 = process_raster(YEAR2)

# Ensure identical shapes after clipping
if raster1.shape != raster2.shape:
    print(f"⚠️ Shape mismatch: {YEAR1}={raster1.shape}, {YEAR2}={raster2.shape}")
    # Crop to common extent
    common_bounds = (
        max(raster1.rio.bounds()[0], raster2.rio.bounds()[0]),  # minx
        max(raster1.rio.bounds()[1], raster2.rio.bounds()[1]),  # miny
        min(raster1.rio.bounds()[2], raster2.rio.bounds()[2]),  # maxx
        min(raster1.rio.bounds()[3], raster2.rio.bounds()[3])   # maxy
    )
    raster1 = raster1.rio.clip_box(*common_bounds)
    raster2 = raster2.rio.clip_box(*common_bounds)
    print(f"Adjusted to common shape: {raster1.shape}")

# Filter to selected classes and flatten (identical shape now guaranteed)
def filter_and_flatten(raster_data, class_dict):
    arr = raster_data.values
    valid_classes = np.array(list(class_dict.keys()))
    mask = np.isin(arr, valid_classes)
    return np.where(mask, arr, np.nan).flatten()

lulc1 = filter_and_flatten(raster1, filtered_dict)
lulc2 = filter_and_flatten(raster2, filtered_dict)

# Calculate transitions (now safe with identical array sizes)
valid_pixels = ~np.isnan(lulc1) & ~np.isnan(lulc2)
transitions = [
    (f"{filtered_dict[int(c1)]['name']} ({YEAR1})", f"{filtered_dict[int(c2)]['name']} ({YEAR2})")
    for c1, c2 in zip(lulc1[valid_pixels], lulc2[valid_pixels])
]
# ======================
# VISUALIZATION
# ======================
transition_counts = Counter(transitions)
df = pd.DataFrame(transition_counts.items(), columns=["Transition", "Count"])
df[["From", "To"]] = df["Transition"].apply(pd.Series)
df.drop(columns=["Transition"], inplace=True)

def plot_sankey(df):
    labels = sorted(set(df["From"]).union(set(df["To"])))
    label_colors = {label: filtered_dict[int(label.split()[0][1:])]["color"]
                   for label in labels}  # Extract class number from label

    fig = go.Figure(go.Sankey(
        node=dict(
            label=labels,
            color=[label_colors[label] for label in labels],
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5)
        ),
        link=dict(
            source=[labels.index(f) for f in df["From"]],
            target=[labels.index(t) for t in df["To"]],
            value=df["Count"],
            color=[label_colors[f] for f in df["From"]],
            hoverinfo="all"
        )
    ))

    title = (f"Land Use Transition {YEAR1}→{YEAR2}<br>"
             f"<sup>Showing: {', '.join(SELECTED_CLASSES)} | "
             f"CRS: {raster1.rio.crs}</sup>")

    fig.update_layout(
        title_text=title,
        title_x=0.5,
        font_size=12,
        height=600
    )
    fig.show()

plot_sankey(df)

# Optional: Save transition matrix
#df.to_csv(f"transitions_{YEAR1}_{YEAR2}.csv", index=False)

⚠️ Shape mismatch: 2015=(9279, 3888), 2024=(8944, 4025)
Adjusted to common shape: (9279, 3888)


ValueError: operands could not be broadcast together with shapes (36076752,) (35999600,) 

In [None]:
import os
import xarray as xr
import numpy as np
import rioxarray as rxr
import pandas as pd
import geopandas as gpd
from collections import Counter
import plotly.graph_objects as go

# ======================
# USER CONFIGURATION
# ======================
YEAR1 = 2014
YEAR2 = 2018
SELECTED_CLASSES = ["exploited_peat", "peat", "water", "forest"]  # Modify as needed
RASTER_FOLDER = "/content/drive/MyDrive/Programming/R projects/Peat_LULC/output"
SHAPEFILE_PATH = "/path/to/your/mask_shapefile.shp"  # Update this

# ======================
# DATA PREPARATION
# ======================
# Class definitions (don't modify)
clases = ["exploited_peat", "peat", "water", "arid", "forest", "urban", "grass_shrub"]
clas_num = [0, 1, 2, 3, 4, 5, 6]
colors = ["#8B4513", "#A0522D", "#1E90FF", "#FFD700", "#228B22", "#FF0000", "#32CD32"]
clas_dict = {num: {"name": name, "color": color} for num, name, color in zip(clas_num, clases, colors)}
filtered_dict = {num: clas_dict[num] for num, info in clas_dict.items() if info["name"] in SELECTED_CLASSES}

# ======================
# SPATIAL ALIGNMENT
# ======================
def validate_spatial_alignment(raster, mask):
    """Ensure CRS and extents match between raster and mask"""
    # Check CRS match
    if raster.rio.crs != mask.crs:
        print(f"⚠️ CRS mismatch: Raster={raster.rio.crs}, Mask={mask.crs}")
        print(f"Reprojecting mask to raster CRS: {raster.rio.crs}")
        mask = mask.to_crs(raster.rio.crs)

    # Check extent overlap
    raster_bounds = raster.rio.bounds()
    mask_bounds = mask.total_bounds
    if not (raster_bounds[0] <= mask_bounds[2] and raster_bounds[2] >= mask_bounds[0] and
            raster_bounds[1] <= mask_bounds[3] and raster_bounds[3] >= mask_bounds[1]):
        raise ValueError("Raster and mask extents do not overlap!")

    return mask

# Load mask and first raster to determine alignment
mask = gpd.read_file(SHAPEFILE_PATH)
sample_raster = rxr.open_rasterio(os.path.join(RASTER_FOLDER, f"mosaic_class_year_{YEAR1}.tif")).squeeze()
mask = validate_spatial_alignment(sample_raster, mask)

# ======================
# DATA PROCESSING
# ======================
def process_raster(year):
    raster_path = os.path.join(RASTER_FOLDER, f"mosaic_class_year_{year}.tif")
    if not os.path.exists(raster_path):
        raise FileNotFoundError(f"Raster for {year} not found at {raster_path}")

    raster = rxr.open_rasterio(raster_path, masked=True).squeeze()
    raster = raster.rio.clip(mask.geometry, crs=raster.rio.crs, all_touched=True)
    return raster

raster1 = process_raster(YEAR1)
raster2 = process_raster(YEAR2)

# Filter to selected classes and flatten
def filter_and_flatten(raster_data, class_dict):
    arr = raster_data.values
    valid_pixels = np.isin(arr, list(class_dict.keys()))
    return np.where(valid_pixels, arr, np.nan).flatten()

lulc1 = filter_and_flatten(raster1, filtered_dict)
lulc2 = filter_and_flatten(raster2, filtered_dict)

# Calculate transitions
valid_pixels = ~np.isnan(lulc1) & ~np.isnan(lulc2)
transitions = [
    (f"{filtered_dict[int(c1)]['name']} ({YEAR1})", f"{filtered_dict[int(c2)]['name']} ({YEAR2})")
    for c1, c2 in zip(lulc1[valid_pixels], lulc2[valid_pixels])
]

# ======================
# VISUALIZATION
# ======================
transition_counts = Counter(transitions)
df = pd.DataFrame(transition_counts.items(), columns=["Transition", "Count"])
df[["From", "To"]] = df["Transition"].apply(pd.Series)
df.drop(columns=["Transition"], inplace=True)

def plot_sankey(df):
    labels = sorted(set(df["From"]).union(set(df["To"])))
    label_colors = {label: filtered_dict[int(label.split()[0][1:])]["color"]
                   for label in labels}  # Extract class number from label

    fig = go.Figure(go.Sankey(
        node=dict(
            label=labels,
            color=[label_colors[label] for label in labels],
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5)
        ),
        link=dict(
            source=[labels.index(f) for f in df["From"]],
            target=[labels.index(t) for t in df["To"]],
            value=df["Count"],
            color=[label_colors[f] for f in df["From"]],
            hoverinfo="all"
        )
    ))

    title = (f"Land Use Transition {YEAR1}→{YEAR2}<br>"
             f"<sup>Showing: {', '.join(SELECTED_CLASSES)} | "
             f"CRS: {raster1.rio.crs}</sup>")

    fig.update_layout(
        title_text=title,
        title_x=0.5,
        font_size=12,
        height=600
    )
    fig.show()

plot_sankey(df)

# Optional: Save transition matrix
df.to_csv(f"transitions_{YEAR1}_{YEAR2}.csv", index=False)