# UPDATE: Vanessa atalanta outbreaks

In [1]:
# libraries
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import pickle as pkl
from IPython.display import display
import re
from matplotlib import cm, colors

In [2]:
# seaborn style for plots
sns.set_theme(
    style="darkgrid",
    color_codes=True,
    palette='Dark2',
)

# get hex color codes of Dark2 matplotlib colormap
cmap = cm.get_cmap('Dark2', 8)
hex = []
for i in range(cmap.N):
    rgba = cmap(i)
    # convert rgb(a) to hex
    hex.append(colors.rgb2hex(rgba))

In [6]:
# import (updated) data
count_daily = pd.read_csv('../../data/all_species_count_daily_upd.csv',
                          parse_dates=['date'])

count_by_trap = pd.read_csv('../../data/all_species_count_by_trap_upd.csv',
                            parse_dates=['date'])

# focus on years 1998 to 2020
count_daily = count_daily[(count_daily.year >= 1998) & (count_daily.year <= 2020)].reset_index(drop=True)

Unnamed: 0,species,date,day,month,year,count,tmp,cloud,wind_dir
0,Vanessa atalanta,1998-04-01,1,4,1998,,,,
1,Vanessa atalanta,1998-04-02,2,4,1998,,,,
2,Vanessa atalanta,1998-04-03,3,4,1998,,,,
3,Vanessa atalanta,1998-04-04,4,4,1998,,,,
4,Vanessa atalanta,1998-04-05,5,4,1998,,,,
...,...,...,...,...,...,...,...,...,...
74170,Pararge aegeria,2020-10-28,28,10,2020,,,,
74171,Pararge aegeria,2020-10-29,29,10,2020,,,,
74172,Pararge aegeria,2020-10-30,30,10,2020,,,,
74173,Pararge aegeria,2020-10-31,31,10,2020,,,,


### Classify outbreaks

In order to classify outbreaks, we'll use the MAD threshold and visualise it in a stacked bar plot. Which season dominates in outbreak years?

In [13]:
def which_season(row):
    if row['month'] < 7:   # define spring as
        val = 'spring'     # april to june, skip
    elif row['month'] > 7: # july, and autumn from
        val = 'autumn'     # august to october
    else:
        val = 'july'
    return val

# add season as column
count_daily['season'] = count_daily.apply(which_season, axis=1)

count_daily


Unnamed: 0,species,date,day,month,year,count,tmp,cloud,wind_dir,season
0,Vanessa atalanta,1998-04-01,1,4,1998,,,,,spring
1,Vanessa atalanta,1998-04-02,2,4,1998,,,,,spring
2,Vanessa atalanta,1998-04-03,3,4,1998,,,,,spring
3,Vanessa atalanta,1998-04-04,4,4,1998,,,,,spring
4,Vanessa atalanta,1998-04-05,5,4,1998,,,,,spring
...,...,...,...,...,...,...,...,...,...,...
74170,Pararge aegeria,2020-10-28,28,10,2020,,,,,autumn
74171,Pararge aegeria,2020-10-29,29,10,2020,,,,,autumn
74172,Pararge aegeria,2020-10-30,30,10,2020,,,,,autumn
74173,Pararge aegeria,2020-10-31,31,10,2020,,,,,autumn


In [None]:

# sum up counts per season
df = (
    df
    .groupby(['species', 'year', 'season', 'trap_dir'], as_index=False)
    ['count'].sum(min_count=1)
)
# compute average (median!) count across all years
df = df.join(
    other=df.groupby(['species', 'season', 'trap_dir'])['count'].median(),
    on=['species', 'season', 'trap_dir'],
    rsuffix='_avg'
)
# median absolute deviation
df = df.join(
    other=df.groupby(['species', 'season', 'trap_dir'])['count'].apply(median_abs_deviation, scale='normal'),
    on=['species', 'season', 'trap_dir'],
    rsuffix='_mad'
)
# computing outbreak index with zero reflecting the total average count
df['index'] = df['count'] - df['count_avg']
return df

count_outbreak_index_season = outbreaks_per_season(count_by_dir_short)
count_outbreak_index_season

In [11]:
count_by_trap

Unnamed: 0,species,date,day,month,year,trap,trap_dir,count,tmp,cloud,wind_dir
0,Vanessa atalanta,1982-04-01,1,4,1982,L-1,south,,,0,W
1,Vanessa atalanta,1982-04-01,1,4,1982,L-2,north,,,0,W
2,Vanessa atalanta,1982-04-01,1,4,1982,L-3,north,,,0,W
3,Vanessa atalanta,1982-04-01,1,4,1982,L-4,south,,,0,W
4,Vanessa atalanta,1982-04-01,1,4,1982,L-5,north,,,0,W
...,...,...,...,...,...,...,...,...,...,...,...
902995,Pararge aegeria,2021-11-01,1,11,2021,L-3,north,,,,
902996,Pararge aegeria,2021-11-01,1,11,2021,L-4,south,,,,
902997,Pararge aegeria,2021-11-01,1,11,2021,L-5,north,,,,
902998,Pararge aegeria,2021-11-01,1,11,2021,L-7,south,,,,
