In [26]:
# IMPORT REQUIRED MODULES 
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import scipy.stats 
from scipy.stats import skew, kurtosis
import seaborn as sns
import numpy as np

In [3]:
discharge_file = 'R:/KOGE_BUGT/github/data/mankoff_2020_ice_discharge/sector_D.csv'
error_file = 'R:/KOGE_BUGT/github/data/mankoff_2020_ice_discharge/sector_err.csv'
coverage_file = 'R:/KOGE_BUGT/github/data/mankoff_2020_ice_discharge/sector_coverage.csv'
output_file = 'R:/KOGE_BUGT/github/data/mankoff_2020_ice_discharge/koge_bugt_C_ice_discharge.csv'

# FOR EACH CSV FILE, EXTRACT THE KOGE BUGT C COLUMN
discharge_df = pd.read_csv(discharge_file, usecols=['Date', 'KOGE_BUGT_C'])
error_df = pd.read_csv(error_file, usecols=['Date', 'KOGE_BUGT_C'])
coverage_df = pd.read_csv(coverage_file, usecols=['Date', 'KOGE_BUGT_C'])

# RENAME EACH COLUMN (DISCHARGE, ERROR, COVERAGE)
discharge_df = discharge_df.rename(columns={'KOGE_BUGT_C': 'Discharge'})
error_df = error_df.rename(columns={'KOGE_BUGT_C': 'Error'})
coverage_df = coverage_df.rename(columns={'KOGE_BUGT_C': 'Coverage'})

# COMBINE THE THREE DATAFRAMES, REMOVE ANY OBSERVATIONS WHEN THE COVERAGE IS < 0.5 AND SAVE AS THE SPECIFIED OUTPUT FILE.
merged_df = discharge_df.merge(error_df, on='Date').merge(coverage_df, on='Date')
filtered_df = merged_df[merged_df['Coverage'] >= 0.5]
filtered_df.to_csv(output_file, index=False)


In [5]:
# COMPLETE THE SAME PROCESS AS ABOVE FOR KOGE_BUGT_N AND KOGE_BUGT_S

## KOGE BUGT NORTH 
output_file_north = 'R:/KOGE_BUGT/github/data/mankoff_2020_ice_discharge/koge_bugt_N_ice_discharge.csv'
discharge_df = pd.read_csv(discharge_file, usecols=['Date', 'KOGE_BUGT_N'])
error_df = pd.read_csv(error_file, usecols=['Date', 'KOGE_BUGT_N'])
coverage_df = pd.read_csv(coverage_file, usecols=['Date', 'KOGE_BUGT_N'])
discharge_df = discharge_df.rename(columns={'KOGE_BUGT_N': 'Discharge'})
error_df = error_df.rename(columns={'KOGE_BUGT_N': 'Error'})
coverage_df = coverage_df.rename(columns={'KOGE_BUGT_N': 'Coverage'})
merged_df = discharge_df.merge(error_df, on='Date').merge(coverage_df, on='Date')
filtered_df = merged_df[merged_df['Coverage'] >= 0.5]
filtered_df.to_csv(output_file_north, index=False)

## KOGE BUGT SOUTH
output_file_south = 'R:/KOGE_BUGT/github/data/mankoff_2020_ice_discharge/koge_bugt_S_ice_discharge.csv'
discharge_df = pd.read_csv(discharge_file, usecols=['Date', 'KOGE_BUGT_S'])
error_df = pd.read_csv(error_file, usecols=['Date', 'KOGE_BUGT_S'])
coverage_df = pd.read_csv(coverage_file, usecols=['Date', 'KOGE_BUGT_S'])
discharge_df = discharge_df.rename(columns={'KOGE_BUGT_S': 'Discharge'})
error_df = error_df.rename(columns={'KOGE_BUGT_S': 'Error'})
coverage_df = coverage_df.rename(columns={'KOGE_BUGT_S': 'Coverage'})
merged_df = discharge_df.merge(error_df, on='Date').merge(coverage_df, on='Date')
filtered_df = merged_df[merged_df['Coverage'] >= 0.5]
filtered_df.to_csv(output_file_south, index=False)

In [32]:
# PRINT SOME BASIC STATISTICS FOR EACH GLACIER BETWEEN 2016 AND 2024
start_date = '2016-01-01'
end_date = '2024-01-01'

files = {'Koge Bugt North': 'R:/KOGE_BUGT/github/data/mankoff_2020_ice_discharge/koge_bugt_N_ice_discharge.csv',
    'Koge Bugt Central': 'R:/KOGE_BUGT/github/data/mankoff_2020_ice_discharge/koge_bugt_C_ice_discharge.csv',
    'Koge Bugt South': 'R:/KOGE_BUGT/github/data/mankoff_2020_ice_discharge/koge_bugt_S_ice_discharge.csv'}

summary_stats = []
for name, path in files.items():
        df = pd.read_csv(path)
        df['Date'] = pd.to_datetime(df['Date'])
        filtered_df = df[(df['Date'] >= start_date) & (df['Date'] <= end_date)]
        date_max_discharge = filtered_df.loc[filtered_df['Discharge'].idxmax(), 'Date'].strftime('%Y-%m-%d')
        date_min_discharge = filtered_df.loc[filtered_df['Discharge'].idxmin(), 'Date'].strftime('%Y-%m-%d')
        skewness = skew(filtered_df['Discharge'].dropna()) # Skewness measures the asymmetry of a distribution
        kurt = kurtosis(filtered_df['Discharge'].dropna()) # Kurtosis measures the tailedness of a distribution
        stats = {'Glacier': name,
            'Mean Discharge': f"{filtered_df['Discharge'].dropna().mean():.2f}",
            'Median Discharge': f"{filtered_df['Discharge'].dropna().median():.2f}",
            'STD Discharge': f"{filtered_df['Discharge'].dropna().std():.2f}",
            'Min Discharge': f"{filtered_df['Discharge'].dropna().min():.2f}",
            'Date of Min Discharge': date_min_discharge,
            'Max Discharge': f"{filtered_df['Discharge'].dropna().max():.2f}",
            'Date of Max Discharge': date_max_discharge,
            'Skewness': f"{skewness:.2f}",
            'Kurtosis': f"{kurt:.2f}"}
        summary_stats.append(stats)
summary_df = pd.DataFrame(summary_stats)
summary_df

Unnamed: 0,Glacier,Mean Discharge,Median Discharge,STD Discharge,Min Discharge,Date of Min Discharge,Max Discharge,Date of Max Discharge,Skewness,Kurtosis
0,Koge Bugt North,4.17,4.15,0.13,3.87,2016-04-15,4.57,2021-08-17,0.47,0.02
1,Koge Bugt Central,15.93,16.38,1.21,13.05,2023-05-18,17.59,2021-08-17,-0.87,-0.57
2,Koge Bugt South,8.17,8.15,0.15,7.65,2023-09-15,8.61,2016-07-15,0.14,0.35
