## Here are some code for reference purposes for the pandas syntax

```python
# Sample time series data (monthly temperature data over several years)
date_rng = pd.date_range(start='2010-01-01', end='2020-12-31', freq='M')
np.random.seed(0)
data = pd.DataFrame(date_rng, columns=['date'])
data['temperature'] = np.random.normal(loc=15, scale=10, size=(len(date_rng)))  # Random temperature data

# Set date as index
data.set_index('date', inplace=True)

# Extract month and year
data['month'] = data.index.month
data['year'] = data.index.year

# Calculate climatology (mean temperature for each month over all years)
climatology = data.groupby('month')['temperature'].mean().rename('climatology_mean')

# Merge the climatology with the original data
data = data.merge(climatology, left_on='month', right_index=True)

# Calculate the anomaly
data['anomaly'] = data['temperature'] - data['climatology_mean']
print(data.head(12))  # Print first 12 months to check the result
```

The terminal output:

```terminal
            temperature  month  year  climatology_mean    anomaly
date                                                             
2010-01-31    32.640523      1  2010         22.887543   9.752980
2011-01-31    22.610377      1  2011         22.887543  -0.277166
2012-01-31    37.697546      1  2012         22.887543  14.810003
2013-01-31    27.302907      1  2013         22.887543   4.415364
2014-01-31    -1.138978      1  2014         22.887543 -24.026522
2015-01-31     8.275396      1  2015         22.887543 -14.612148
2016-01-31    26.394007      1  2016         22.887543   3.506464
2017-01-31    29.882522      1  2017         22.887543   6.994979
2018-01-31    15.105000      1  2018         22.887543  -7.782543
2019-01-31    34.229420      1  2019         22.887543  11.341877
2020-01-31    18.764255      1  2020         22.887543  -4.123288
2010-02-28    19.001572      2  2010         17.296001   1.705571

```

## Code goes under this line

In [1]:
import pandas as pd
import numpy as np
import os

In [2]:
nc_path = r"C:\Users\Kris\Documents\amazonforcast\data\malaria_amazon\amazon_monthly_ldas\routing\csv\ZonalSt_hybas_lev05_Streamflow_tmax.csv"

data = pd.read_csv(nc_path)

data_2001_2020 = pd.read_csv(nc_path)


In [3]:
data["StdTime"] = pd.to_datetime(data["StdTime"])
data["StdTime"]

0       2001-01-31
1       2001-01-31
2       2001-01-31
3       2001-01-31
4       2001-01-31
           ...    
41671   2023-12-31
41672   2023-12-31
41673   2023-12-31
41674   2023-12-31
41675   2023-12-31
Name: StdTime, Length: 41676, dtype: datetime64[ns]

In [4]:
data.set_index('StdTime', inplace=True)


In [5]:
data

Unnamed: 0_level_0,PFAF_ID,COUNT,AREA,Variable,Dimensions,MAX
StdTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2001-01-31,61564,166.0,0.4150,Streamflow_tavg,StdTime,53.298950
2001-01-31,61565,16.0,0.0400,Streamflow_tavg,StdTime,766.090942
2001-01-31,61566,321.0,0.8025,Streamflow_tavg,StdTime,200.424118
2001-01-31,61567,251.0,0.6275,Streamflow_tavg,StdTime,759.766968
2001-01-31,61568,289.0,0.7225,Streamflow_tavg,StdTime,162.838287
...,...,...,...,...,...,...
2023-12-31,67204,698.0,1.7450,Streamflow_tavg,StdTime,965.670715
2023-12-31,67205,734.0,1.8350,Streamflow_tavg,StdTime,1298.559204
2023-12-31,67206,722.0,1.8050,Streamflow_tavg,StdTime,1387.592651
2023-12-31,67207,720.0,1.8000,Streamflow_tavg,StdTime,1394.842651


In [6]:
data['month'] = data.index.month
data['year'] = data.index.year

In [7]:
# Filter the data for the years 2001 to 2024 (or beyond your current maximum date)
data = data[data.index.year <= 2024]

In [8]:
# Calculate climatology (mean temperature for each month over years 2001 to 2020 for each PFAF_ID)
climatology_2001_to_2020 = data[data.index.year.isin(range(2001, 2021))].groupby(['PFAF_ID', 'month'])['MAX'].mean().rename('climatology_mean')

In [9]:
# Merge the climatology with the original data
data = data.merge(climatology_2001_to_2020, left_on=['PFAF_ID', 'month'], right_index=True, suffixes=('', '_clim'))

# Calculate the anomaly
data['anomaly'] = data['MAX'] - data['climatology_mean']

In [10]:
data

Unnamed: 0_level_0,PFAF_ID,COUNT,AREA,Variable,Dimensions,MAX,month,year,climatology_mean,anomaly
StdTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,53.298950,1,2001,66.873812,-13.574861
2002-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,69.457275,1,2002,66.873812,2.583464
2003-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,61.097664,1,2003,66.873812,-5.776148
2004-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,57.528519,1,2004,66.873812,-9.345293
2005-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,64.865562,1,2005,66.873812,-2.008249
...,...,...,...,...,...,...,...,...,...,...
2019-12-31,67208,496.0,1.240,Streamflow_tavg,StdTime,1349.599243,12,2019,1814.002460,-464.403217
2020-12-31,67208,496.0,1.240,Streamflow_tavg,StdTime,1546.011719,12,2020,1814.002460,-267.990741
2021-12-31,67208,496.0,1.240,Streamflow_tavg,StdTime,2432.778564,12,2021,1814.002460,618.776105
2022-12-31,67208,496.0,1.240,Streamflow_tavg,StdTime,1480.862183,12,2022,1814.002460,-333.140277


In [11]:
data.to_csv(r"C:\Users\Kris\Documents\amazonforcast\data\malaria_amazon\amazon_monthly_ldas\routing\csv\ZonalSt_hybas_lev05_Streamflow_tmax_anomaly.csv")

checking

In [45]:
import plotly

In [12]:
data_61654 = data[data['PFAF_ID'] == 61564]
data_61654

Unnamed: 0_level_0,PFAF_ID,COUNT,AREA,Variable,Dimensions,MAX,month,year,climatology_mean,anomaly
StdTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2001-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,53.298950,1,2001,66.873812,-13.574861
2002-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,69.457275,1,2002,66.873812,2.583464
2003-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,61.097664,1,2003,66.873812,-5.776148
2004-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,57.528519,1,2004,66.873812,-9.345293
2005-01-31,61564,166.0,0.415,Streamflow_tavg,StdTime,64.865562,1,2005,66.873812,-2.008249
...,...,...,...,...,...,...,...,...,...,...
2019-12-31,61564,166.0,0.415,Streamflow_tavg,StdTime,62.324371,12,2019,88.229130,-25.904758
2020-12-31,61564,166.0,0.415,Streamflow_tavg,StdTime,69.944717,12,2020,88.229130,-18.284412
2021-12-31,61564,166.0,0.415,Streamflow_tavg,StdTime,64.641136,12,2021,88.229130,-23.587994
2022-12-31,61564,166.0,0.415,Streamflow_tavg,StdTime,71.331337,12,2022,88.229130,-16.897793


In [57]:
fig = plotly.boxplot_frame(data_61654, x=data_61654.index, y='anomaly', title='Temperature Anomaly for PFAF_ID 61563')
fig.update_layout(xaxis_title='Date', yaxis_title='Anomaly', template='plotly_dark')
fig.show()