In [1]:
import pandas as pd
import numpy as np
data = pd.read_csv("output/cleaned_data_GSAF5.csv")
data = data.drop(['Unnamed: 0'], axis=1)

In [2]:
data.head()

Unnamed: 0,Date,Year,Month,Hour,Place,Area,Activity,Sex,Fatal
0,2016.09.18,2016,9,13:00,USA,Florida,Surfing,M,0.0
1,2016.09.18,2016,9,11:00,USA,Florida,Surfing,M,0.0
2,2016.09.18,2016,9,10:43,USA,Florida,Surfing,M,0.0
3,2016.09.17,2016,9,Unknown,AUSTRALIA,Victoria,Surfing,M,0.0
4,2016.09.15,2016,9,Unknown,AUSTRALIA,Victoria,Surfing,M,0.0


### 1st view: Seasonality of the attacks

In [3]:
#Binning the data by season on a new column.

season_labels = ['Winter', 'Spring', 'Summer', 'Autumn']
cutoffs = [1, 4, 7, 10, 12]
bins = pd.cut(data['Month'], cutoffs, labels=season_labels)
data['Season'] = bins

In [4]:
#Ratio of attacks per season.

seasonality = data.pivot_table(index=['Season'], values=['Date'], aggfunc= len, fill_value=0) 
seasonality = seasonality.rename(columns= {'Date' : 'Count'})
seasonality['Ratio'] = seasonality['Count'] * 100 / seasonality['Count'].sum() 
seasonality = seasonality.round({'Ratio' : 2})
display(seasonality)

Unnamed: 0_level_0,Count,Ratio
Season,Unnamed: 1_level_1,Unnamed: 2_level_1
Winter,1167,23.48
Spring,1476,29.7
Summer,1511,30.4
Autumn,816,16.42


### 2nd view: indexing the attacks by Activity and Season

In [5]:
activity_season = data.pivot_table(index=['Activity', 'Season'], values=['Date'], aggfunc= len, fill_value=0) 
activity_season = activity_season.rename(columns= {'Date' : 'Count'})
activity_season['Ratio'] = activity_season['Count'] * 100 / activity_season['Count'].sum() 
activity_season = activity_season.round({'Ratio' : 2})
activity_season.sort_values(by=['Activity','Ratio'], ascending=False, inplace=True)
display(activity_season)

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Ratio
Activity,Season,Unnamed: 2_level_1,Unnamed: 3_level_1
Swimming,Spring,278,5.59
Swimming,Summer,260,5.23
Swimming,Winter,190,3.82
Swimming,Autumn,141,2.84
Surfing,Summer,378,7.61
Surfing,Spring,269,5.41
Surfing,Winter,254,5.11
Surfing,Autumn,153,3.08
Others,Spring,540,10.87
Others,Summer,481,9.68


In [6]:
fatal_activity = data.pivot_table(index=['Activity', 'Fatal'], values=['Date'], aggfunc= len, fill_value=0) 
fatal_activity = fatal_activity.rename(columns= {'Date' : 'Count'})
fatal_activity['Ratio'] = fatal_activity['Count'] * 100 / fatal_activity['Count'].sum() 
fatal_activity = fatal_activity.round({'Ratio' : 2})
display(fatal_activity)

Unnamed: 0_level_0,Unnamed: 1_level_0,Count,Ratio
Activity,Fatal,Unnamed: 2_level_1,Unnamed: 3_level_1
Diving,0.0,339,5.81
Diving,1.0,134,2.3
Fishing,0.0,917,15.72
Fishing,1.0,185,3.17
Others,0.0,1423,24.4
Others,1.0,665,11.4
Surfing,0.0,1082,18.55
Surfing,1.0,75,1.29
Swimming,0.0,584,10.01
Swimming,1.0,429,7.35


In [11]:
data.groupby('Activity').Season.value_counts(normalize=True)


Activity  Season
Diving    Summer    0.325062
          Spring    0.287841
          Winter    0.243176
          Autumn    0.143921
Fishing   Spring    0.295775
          Summer    0.282774
          Winter    0.248104
          Autumn    0.173348
Others    Spring    0.313771
          Summer    0.279489
          Winter    0.230099
          Autumn    0.176641
Surfing   Summer    0.358634
          Spring    0.255218
          Winter    0.240987
          Autumn    0.145161
Swimming  Spring    0.319908
          Summer    0.299194
          Winter    0.218642
          Autumn    0.162255
Name: Season, dtype: float64