## The code computed geomorphology changes as erosion and accretion over years

In [1]:
import matplotlib.pyplot as plt
import numpy as np
from osgeo import gdal
import numpy.ma as ma
import os
import pandas as pd
from datetime import datetime
import rasterio
from rasterio.enums import Resampling
from rasterio.warp import calculate_default_transform, reproject

## Import Ancillary datasets
- We have two ancillary source of information, one is a water frequency raster (no. of times a pixels remained as water in 2003-2017)
- Self digitized center line of Xingu river and a buffer around it to extract pixels only on the river channel

In [3]:
waterFreq_raster_file = '/Users/bvarugu/Documents/Belomonte/GIS_layers/open_water_frequency_Xingu_clip.tif'
sample_rtc_file = '/Users/bvarugu/Documents/Belomonte/Xingu_hydroperiod/SepDec/Xingu_SAR_hydroperiod_2016_SepDec.tif'
with rasterio.open(sample_rtc_file) as src_rtc:
    rtc_data = src_rtc.read(1)  # Assuming single band data
    rtc_profile = src_rtc.profile
    rtc_crs = src_rtc.crs
    rtc_transform = src_rtc.transform
    rtc_width = src_rtc.width
    rtc_height = src_rtc.height
with rasterio.open(waterFreq_raster_file) as src_waterFreq:
    # Calculate the transform for the resampled data
    dst_transform, dst_width, dst_height = calculate_default_transform(
        src_waterFreq.crs, rtc_crs, src_waterFreq.width, src_waterFreq.height, *src_waterFreq.bounds, 
        dst_width=rtc_width, dst_height=rtc_height, resolution=None
    )

    # Create an empty array to store the resampled data
    waterFreq = np.empty((rtc_height, rtc_width), dtype=src_waterFreq.dtypes[0])

    # Perform the resampling
    rasterio.warp.reproject(
        source=rasterio.band(src_waterFreq, 1),
        destination=waterFreq,
        src_transform=src_waterFreq.transform,
        src_crs=src_waterFreq.crs,
        dst_transform=dst_transform,
        dst_crs=rtc_crs,
        resampling=Resampling.nearest  # Adjust as needed
    )
riverCenter_raster_file = '/Users/bvarugu/Documents/Belomonte/GIS_layers/Xingu_river_center_5km_buffer_UTM.tif'
with rasterio.open(riverCenter_raster_file) as src_riverCenter:
    # Calculate the transform for the resampled data
    dst_transform, dst_width, dst_height = calculate_default_transform(
        src_riverCenter.crs, rtc_crs, src_riverCenter.width, src_riverCenter.height, *src_riverCenter.bounds, 
        dst_width=rtc_width, dst_height=rtc_height, resolution=None
    )

    # Create an empty array to store the resampled data
    riverCenter = np.empty((rtc_height, rtc_width), dtype=src_riverCenter.dtypes[0])

    # Perform the resampling
    rasterio.warp.reproject(
        source=rasterio.band(src_riverCenter, 1),
        destination=riverCenter,
        src_transform=src_riverCenter.transform,
        src_crs=src_riverCenter.crs,
        dst_transform=dst_transform,
        dst_crs=rtc_crs,
        resampling=Resampling.nearest  # Adjust as needed
    )

## Create a dict with mean Land-water masks for each year

- Here we import the hydroperiod for each year and take pixels with hydroperiod > 0.75 resulting in a year-wise land-water mask
- This step suppresses the noise due to over-identification of water pixels in agricultural areas in individual SAR backscatter images


In [4]:
years = ['2016','2017','2018','2019', '2020','2021', '2022','2023']
num_years = len(years)
LW_mask_dict = {}

# Define a helper function to read and process the raster file
def process_raster_file(file_path,thres):
    dataset = gdal.Open(file_path)
    if dataset is None:
        print(f"Failed to open raster file {file_path}")
        return None, None
    band = dataset.GetRasterBand(1)
    hydroperiod = band.ReadAsArray().astype(float)
    nodata_value = band.GetNoDataValue();
    metadata = dataset.GetMetadata(); # if using rasterio, metadata = dataset.tags()
    
    # If 'dates' is part of the metadata, split it into a list
    if 'dates' in metadata:
        metadata['dates'] = metadata['dates'].split(',');
        SAR_date_list = metadata['dates'];

    dataset = None
    binary_mask = np.zeros(hydroperiod.shape, dtype=np.uint8)
    binary_mask[hydroperiod < thres] = 1
    if nodata_value is not None:
        binary_mask[hydroperiod == nodata_value] = 255
    return binary_mask, hydroperiod, SAR_date_list

for year in years:
    hydroperiod_threshold = 0.75;
    hydroperiod_raster_file = f'/Users/bvarugu/Documents/Belomonte/Xingu_hydroperiod/SepDec/Xingu_SAR_hydroperiod_{year}_SepDec.tif'
    binary_mask, hydroperiod, SAR_date_list = process_raster_file(hydroperiod_raster_file,hydroperiod_threshold)
    if binary_mask is not None:
        LW_mask_dict[year] = {
            'mask': binary_mask,
            'hydroperiod': hydroperiod,
            'dateList':SAR_date_list
        }


## Compute yearly changes and visualize
- The background is 2016 mask.
- To compute changes, we subtract one mask from the previous year mask.
- The changes are cummulatively added such that, if an area is consitently eroding, it's location grow in size

In [6]:
%matplotlib Qt
# #Full
# xmin,xmax= 0, np.shape(initial_mask)[1];
# ymin,ymax= 0, np.shape(initial_mask)[0];
# #Far Upstream
xmin,xmax= 2750,3650
ymin,ymax= 16000,17360
# # #Upstream Pimental
# xmin,xmax= 4262,6730
# ymin,ymax= 15400,18000
# # Downstream
# xmin,xmax= 4750,6200
# ymin,ymax= 9550,11700
fig, axes = plt.subplots(2, int(len(years) / 2), figsize=(15, 10), sharex=True, sharey=True)
initial_mask = LW_mask_dict[years[0]]['mask']
cumm_change_mask = np.zeros(np.shape(initial_mask))

for i, year in enumerate(years):
    mask = LW_mask_dict[year]['mask']
    j, k = divmod(i, int(len(years) / 2))
    
    if i == 0:
        axes[j, k].imshow(ma.masked_where(mask[ymin:ymax,xmin:xmax] == 255, mask[ymin:ymax,xmin:xmax]), cmap='Blues_r', vmin=-1, vmax=1, interpolation='nearest');
        #axes[j, k].imshow(ma.masked_where(riverCenter[ymin:ymax,xmin:xmax] == 0, riverCenter[ymin:ymax,xmin:xmax]), cmap='jet', vmin=0, vmax=1, interpolation='nearest')
        
        axes[j, k].set_title(f'Year {year}')
    else:
        prev_mask = LW_mask_dict[years[i-1]]['mask']
        change_mask = mask.astype(int) - prev_mask.astype(int);change_mask[riverCenter!=1] = 0;
        cumm_change_mask += change_mask

        axes[j, k].imshow(ma.masked_where(initial_mask[ymin:ymax,xmin:xmax] == 255, initial_mask[ymin:ymax,xmin:xmax]), cmap='Blues_r', vmin=-1, vmax=1, interpolation='nearest')
        axes[j, k].imshow(ma.masked_where(cumm_change_mask[ymin:ymax,xmin:xmax] == 0, cumm_change_mask[ymin:ymax,xmin:xmax]), cmap='PuOr', vmin=-3, vmax=3, interpolation='nearest')
        
        num_positive_changes = np.sum(change_mask[ymin:ymax,xmin:xmax] == 1)
        num_negative_changes = np.sum(change_mask[ymin:ymax,xmin:xmax] == -1)
        
        axes[j, k].set_title(f'Change {years[i-1]} to {year}\n(+): {num_positive_changes} (-): {num_negative_changes}')
    
    axes[j, k].axis('off')

plt.tight_layout()
plt.show()


## Barplot showing erosion 

- The top plot shows erosion/accretion w.r.t previous year
- The bottom plot shows cuumulative erosion/accretion
- Here we compute the changes to be only in the river channel and exclude the flood plain

In [7]:
# #Far Upstream
# xmin,xmax= 2750,3650
# ymin,ymax= 16000,17360
# # # #Upstream Pimental
# xmin,xmax= 4262,6730
# ymin,ymax= 15400,18000
#Downstream
xmin,xmax= 4750,6200
ymin,ymax= 9550,11700
initial_mask = LW_mask_dict[years[0]]['mask'][ymin:ymax,xmin:xmax]
cumm_change_mask = np.zeros_like(initial_mask, dtype=int)
riverCenter_clipped = riverCenter[ymin:ymax,xmin:xmax];
positive_changes = []
negative_changes = []
cumm_positive_changes = []
cumm_negative_changes = []

for i in range(1, len(years)):
    year = years[i]
    year2 = years[i];year1 = years[i-1];
    mask2 = LW_mask_dict[year2]['mask'][ymin:ymax,xmin:xmax];
    mask1 = LW_mask_dict[year1]['mask'][ymin:ymax,xmin:xmax];
    
    change_mask = mask2.astype(int) - mask1.astype(int)
    cumm_change_mask += change_mask;
    
    num_positive_changes = np.sum(change_mask[riverCenter_clipped==1] > 0)
    num_negative_changes = np.sum(change_mask[riverCenter_clipped==1] < 0)
    
    positive_changes.append(num_positive_changes*100)
    negative_changes.append(num_negative_changes*100)
    
    num_cumm_positive_changes = np.sum(cumm_change_mask[riverCenter_clipped==1] > 0)
    num_cumm_negative_changes = np.sum(cumm_change_mask[riverCenter_clipped==1] < 0)
    
    cumm_positive_changes.append(num_cumm_positive_changes*100)
    cumm_negative_changes.append(num_cumm_negative_changes*100)

# Create a bar plot
fig, axes = plt.subplots(2,1,figsize=(7, 5),sharex=True,sharey=True)
ax1=axes[0];ax2=axes[1];
bar_width = 0.35
index = np.arange(len(years) - 1)
ax1.tick_params(labelsize=10)
bar1 = ax1.bar(index, -1*np.array(positive_changes)*1e-6, bar_width, label='Accretion', color='#006400')
bar2 = ax1.bar(index, np.array(negative_changes)*1e-6, bar_width, label='Erosion', color= '#8C000F')


ax1.set_ylabel('Area in sq.km',fontsize=15)
ax1.set_title('Relative Erosion and Accetion Change w.r.t previous year')
ax1.set_xticks(index)
ax1.set_xticklabels(years[1:])
ax1.set_ylim([-np.max(negative_changes)*1.1*1e-6,np.max(positive_changes)*1.1*1e-6])
ax1.legend()

ax2.tick_params(labelsize=10)
bar1 = ax2.bar(index, -1*np.array(cumm_positive_changes)*1e-6, bar_width, label='Accretion', color='#006400')
bar2 = ax2.bar(index, np.array(cumm_negative_changes)*1e-6, bar_width, label='Erosion', color='#8C000F')

ax2.set_xlabel('Year',fontsize=15)
ax2.set_ylabel('Area in sq.km',fontsize=15)
ax2.set_title('Cumulative Erosion and Accetion Change from 2016')
ax2.set_xticks(index)
ax2.set_xticklabels(years[1:])
ax2.set_ylim([-np.max(cumm_positive_changes)*1.1*1e-6,np.max(cumm_negative_changes)*1.1*1e-6])
ax2.legend()

plt.tight_layout()
plt.show()

## Read reservoir data

- We have inflow, outflow, reservoir level height measurements from Belomonte and Pimental dams
- Currently, Belomonte inflow shows 0 or none values

In [8]:
def custom_date_parser(date):
    try:
        # Try parsing the date using one format
        return pd.to_datetime(date, format='%d/%m/%y')
    except ValueError:
        try:
            # Try parsing the date using another format
            return pd.to_datetime(date, format='%d/%m/%Y')
        except ValueError:
            # Return NaN for invalid dates
            return pd.NaT
def fix_decimal(value):
    try:
        return float(value.replace(',', '.'))  # Replace comma with period
    except ValueError:
        return value  # Return value as is if conversion fails
        
def get_date_reservoir_flow(df,slc1_date):
    
    
    filtered_df = df[df['Date'] == slc1_date];
    if not filtered_df.empty:
    # Select the desired column value
        level_ht = filtered_df['Level'].iloc[0];
        in_flow = filtered_df['inflow'].iloc[0];
        out_flow = filtered_df['outflow'].iloc[0];
    else:
        in_flow,out_flow,level_ht = np.nan,np.nan,np.nan
    return in_flow,out_flow,level_ht

def read_reservoir_data(file):
    col_names= ['Name','Level','inflow','outflow','Date']
    df = pd.read_csv(file,encoding='unicode_escape',header=0,usecols=[1,2,3,4,10],names=col_names,decimal=",",
                 parse_dates=['Date'],date_parser=custom_date_parser);
    df['outflow'] = df['outflow'].apply(fix_decimal)
    return df

dam_discharge_dir= '/Users/bvarugu/Documents/Belomonte/Reservoir_discharge/';
pimental_reservoir= os.path.join(dam_discharge_dir,'Histórico_PIMENTAL.csv');
belomonte_reservoir= os.path.join(dam_discharge_dir,'Histórico_BELOMONTE.csv');
pimental_df = read_reservoir_data(pimental_reservoir);
belomonte_df = read_reservoir_data(belomonte_reservoir);

## Compute yearly flow rates to coorelate with accretion and erosion

In [9]:
def get_yearly_flow(dateList):
    pimental_in_flows = []
    pimental_out_flows = []
    pimental_level_hts = []
    belomonte_in_flows = []
    belomonte_out_flows = []
    belomonte_level_hts = []
    date_vars= [] 
    for date in dateList:
        date = datetime.strptime(date, "%Y%m%d");date_vars.append(date)
        pimental_in_flow,pimental_out_flow,pimental_level_ht = get_date_reservoir_flow(pimental_df,date);
        belomonte_in_flow,belomonte_out_flow,belomonte_level_ht = get_date_reservoir_flow(belomonte_df,date);
        pimental_in_flows.append(pimental_in_flow)
        pimental_out_flows.append(pimental_out_flow)
        pimental_level_hts.append(pimental_level_ht)
        belomonte_in_flows.append(belomonte_in_flow)
        belomonte_out_flows.append(belomonte_out_flow)
        belomonte_level_hts.append(belomonte_level_ht)
    return (date_vars,pimental_in_flows,pimental_out_flows,pimental_level_hts,belomonte_in_flows,belomonte_out_flows,belomonte_level_hts)

results_dict = {
    'dates': [],
    'pimental_in_flow': [],
    'pimental_out_flow': [],
    'pimental_level_ht': [],
    'belomonte_in_flow': [],
    'belomonte_out_flow': [],
    'belomonte_level_ht': []
}

for year in years:
    dateList = LW_mask_dict[year]['dateList']
    results = get_yearly_flow(dateList)

    results_dict['dates'].append(results[0])
    results_dict['pimental_in_flow'].append(results[1])
    results_dict['pimental_out_flow'].append(results[2])
    results_dict['pimental_level_ht'].append(results[3])
    results_dict['belomonte_in_flow'].append(results[4])
    results_dict['belomonte_out_flow'].append(results[5])
    results_dict['belomonte_level_ht'].append(results[6])

# # Convert to DataFrame for easier plotting
# results_df = pd.DataFrame(results_dict)

## Plot flow rate during which masks are computed

In [10]:
pimental_sept_dec = pimental_df[pimental_df['Date'].dt.month.isin([9, 10, 11])]
belomonte_sept_dec = belomonte_df[belomonte_df['Date'].dt.month.isin([9, 10, 11])]

# Add 'Year' column to the filtered dataframes
pimental_sept_dec['Year'] = pimental_sept_dec['Date'].dt.year
belomonte_sept_dec['Year'] = belomonte_sept_dec['Date'].dt.year

# Combine the outflow data from both dataframes
pimental_outflow = pimental_sept_dec[['Date', 'Year', 'outflow']]
belomonte_outflow = belomonte_sept_dec[['Date', 'outflow']]
combined_outflow = pimental_outflow.copy()
combined_outflow['outflow'] += belomonte_outflow['outflow'].values

fig, axes = plt.subplots(2,1,figsize=(7, 5),sharex=True,sharey=True)
ax1=axes[1];ax2=axes[0];

# First subplot
combined_outflow.boxplot(column='outflow', by='Year',ax=ax1,fontsize=15);
ax1.set_title('Combined Outflow (Pimental + Belomonte) (Sep-Dec)')
ax1.set_xlabel('Year')
ax1.set_ylabel('Flow')
ax1.set_ylim([0,3000])
ax1.grid(False);

#Second subplot
pimental_sept_dec.boxplot(column='inflow', by='Year',ax=ax2,fontsize=15);
ax2.set_title('Inflow (Pimental) (Sep-Dec)')
ax2.set_xlabel('Year')
ax2.set_ylabel('Flow')
ax2.set_ylim([0,3000]);
ax2.grid(False);
plt.tight_layout()
plt.show()


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pimental_sept_dec['Year'] = pimental_sept_dec['Date'].dt.year
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  belomonte_sept_dec['Year'] = belomonte_sept_dec['Date'].dt.year


## Correlation between reservoir leve and (inflow-outflow)

- Inflow is taken as Pimental inflow in the upstream
- Outflow is Outflow from Pimental and Belomonte combined as they take different route to merge in the downstream

In [11]:
fig, axes = plt.subplots(2,1,figsize=(7, 5),sharex=True)
ax1=axes[1];ax2=axes[0];

# First subplot
ax1.scatter(pimental_df['Date'],pimental_df['inflow']-(pimental_df['outflow']+belomonte_df['outflow']))
ax1.set_title('Diffrerence inflow-outflow')
ax1.set_xlabel('Year')
ax1.set_ylabel('Flow')
#ax1.set_ylim([0,3000])
#ax1.grid(False);

#Second subplot
ax2.scatter(belomonte_df['Date'],belomonte_df['Level']);
ax2.set_title('BeloMonte Level')
ax2.set_xlabel('Year')
ax2.set_ylabel('Level')
ax2.set_ylim([90,100])
#ax2.grid(False);
plt.tight_layout()
plt.show()

### Scatterplot of correlation

In [12]:
# Ensure the data frames are aligned on dates
merged_df = pd.merge(pimental_df, belomonte_df, on='Date', suffixes=('_pimental', '_belomonte'))

# # Calculate the difference series
merged_df['difference'] = merged_df['inflow_pimental'] - (merged_df['outflow_pimental'] + merged_df['outflow_belomonte'])

# Calculate the correlation
correlation = merged_df['difference'].corr(merged_df['Level_belomonte'])
print("Correlation between difference and Belomonte level:", correlation)

plt.figure(figsize=(10, 6))
plt.scatter(merged_df['difference'], merged_df['Level_belomonte'], alpha=0.5, color='purple')
plt.title('Scatter Plot between Difference and Belomonte Level')
plt.xlabel('Difference (Inflow - (Pimental Outflow + Belomonte Outflow))')
plt.ylabel('Belomonte Level')
plt.ylim([90,100])
plt.grid(True)
plt.show()

Correlation between difference and Belomonte level: 0.016173423497956925


In [13]:
max_outflow = max(pimental_df['inflow'])

plt.figure(figsize=(15, 5))

# First subplot
plt.subplot(1, 2, 1)
plt.plot_date(pimental_df['Date'], pimental_df['outflow'] + belomonte_df['outflow'], color='red', markersize=5)
plt.plot_date(pimental_df['Date'], pimental_df['inflow'], color='blue', linestyle='--', markersize=0, alpha=0.5)
plt.title('Total Downstream Flow')
plt.xlabel('Year')
plt.ylabel('Flow')
for i in range(len(results_dict)):
    normalized_ymax = [outflow / max_outflow for outflow in results_dict['pimental_in_flow'][i]]
    for date, ymax in zip(results_dict['dates'][i], normalized_ymax):
        plt.axvline(x=date, ymin=0,ymax=ymax,color='black', linewidth=1, )
# Second subplot
plt.subplot(1, 2, 2)
plt.plot_date(belomonte_df['Date'], belomonte_df['inflow'], color='blue', linestyle='--', markersize=0, alpha=0.5)
plt.title('Belomonte Inflow')
plt.xlabel('Year')
plt.ylabel('Flow')

plt.tight_layout()
plt.show()
