In [39]:
import glob
import geopandas as gpd
import pandas as pd
from datetime import datetime

hr5_cells = gpd.read_file('global-inputs/HR5-cells-beach-slope.gpkg')

def apply_tidal_correction(cell_id):
    slope = hr5_cells.loc[hr5_cells['cell_id'] == cell_id, 'beach_slope'].values[0]
    ref_ele = 0

    folder = f"data/HR5/{cell_id}"
    raw_df = pd.read_csv(f"{folder}/cell_timeseries.csv")
    raw_df['date'] = pd.to_datetime(raw_df['date'])

    tide_df = pd.read_json(f"{folder}/image_metadata.json")
    tide_df['date'] = tide_df['image_date'].apply(lambda x:datetime.strptime(x,'%Y-%m-%d'))

    merged_df = pd.merge(raw_df, tide_df[['date', 'tide_level_msl']], on='date', how='left') # Merge raw_df and tide_df on the date column

    merged_df['tidal_correction'] = (merged_df.tide_level_msl-ref_ele)/slope
    merged_df['corrected_IW_shoreline_position'] = (merged_df.IW_shoreline_chg + merged_df.tidal_correction).round(2)
    merged_df['cumulative_IW_shoreline_position'] = merged_df['corrected_IW_shoreline_position'].cumsum().round(2)
    merged_df['cumulative_EOV_shoreline_position'] = merged_df['EOV_shoreline_chg'].cumsum().round(2)

    return merged_df

def return_landcover_percentage_change(df):
    landcover = df.drop(columns='date')
    df['total_area'] = landcover.sum(axis=1)
    df['sand_area_percentage'] = (df['sand_area (Ha)']/df['total_area'])*100
    df['water_area_percentage'] = (df['water_area (Ha)']/df['total_area'])*100
    df['vegetation_area_percentage'] = (df['vegetation_area (Ha)']/df['total_area'])*100

    df[['sand_area_percentage_change', 'water_area_percentage_change', 'vegetation_area_percentage_change']] = df[['sand_area_percentage', 'water_area_percentage', 'vegetation_area_percentage']].diff().round(2)

    return df[['date', 'sand_area_percentage_change', 'water_area_percentage_change', 'vegetation_area_percentage_change']].fillna(0)

In [40]:
import json

sites_df = pd.read_csv('global-inputs/sites.csv')

cell_ids = list(sites_df['cell_id'])

cells = hr5_cells[hr5_cells['cell_id'].isin(cell_ids)]

# def remove_duplicates_from_metadata(cell_dir_path):
#     # remove duplicates from _image_metadata files
#     fn_meta = f"{cell_dir_path}/image_metadata.json"
   
#     with open(fn_meta, 'r') as existing_file:
#         existing_data = json.load(existing_file)
    
#     meta_df = pd.DataFrame.from_dict(existing_data, orient='index').transpose() # read image_metadata as pandas df
#     # drop duplicates based on date
#     # dup_df = meta_df[meta_df.duplicated(subset='image_date', keep=False)] 
#     # dup_df = dup_df[dup_df.image_id.str.contains('S2')] # where there are duplicates kee
#     # meta_df.drop_duplicates(subset=['image_date'], inplace=True)
#     # meta_df = pd.concat([meta_df, dup_df]).sort_values(by='image_date').reset_index(drop=True) 
#     return meta_df

# remove_duplicates_from_metadata("data/HR5/85bb58c7fffffff")

In [41]:
cell_ids = cells.cell_id.tolist()
# print(cell_ids)
for cell in cell_ids:
    #print(cell)
    corrected_df = apply_tidal_correction(cell)
    shorelines_df = corrected_df[['date', 'cumulative_IW_shoreline_position', 'cumulative_EOV_shoreline_position', 'corrected_IW_shoreline_position', 'EOV_shoreline_chg']]
    shorelines_df.rename(columns={'EOV_shoreline_chg': 'EOV_shoreline_position'})
    shorelines_df.to_csv(f"data/HR5/{cell}/shoreline_timeseries_tidal_correction.csv")

    landcover_df = corrected_df[['date', 'sand_area (Ha)', 'water_area (Ha)', 'vegetation_area (Ha)']]
    landcover_df = return_landcover_percentage_change(landcover_df)
    landcover_df.to_csv(f"data/HR5/{cell}/landcover_timeseries_percentage_change.csv")

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['total_area'] = landcover.sum(axis=1)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['sand_area_percentage'] = (df['sand_area (Ha)']/df['total_area'])*100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['water_area_percentage'] = (df['water_area (Ha)']/df['total_area'])*100
A value is tryin