# Shark attacks and climate change

## Import libraries and data

In [2]:
# import libreries
import pandas as pd
import numpy as np
import re
import warnings

# libraries for map plotting 
import geopandas
import matplotlib.pyplot as plt
from geodatasets import get_path


# import functions 
from functions import *

In [3]:
#Import data

url = 'https://www.sharkattackfile.net/spreadsheets/GSAF5.xls'

shark_attack_raw = pd.read_excel(url)

shark_attack_raw

Unnamed: 0,Date,Year,Type,Country,State,Location,Activity,Name,Sex,Age,...,Species,Source,pdf,href formula,href,Case Number,Case Number.1,original order,Unnamed: 21,Unnamed: 22
0,15 Mar 2024,2024.0,Unprovoked,AUSTRALIA,Queensland,Bargara Beach,Swimming,Brooklyn Sauer,F,13,...,Tiger shark,"Yahoo News, 3/15/2024",,,,,,,,
1,04 Mar 2024,2024.0,Unprovoked,USA,Hawaii,"Old Man's, Waikiki",Surfing,Matthew White,M,,...,Tiger shark 8',"Surfer, 3/6/2024F",,,,,,,,
2,02 Mar-2024,2024.0,Unprovoked,USA,Hawaii,"Rainbows, Oahu",Swimming,,F,11,...,3' to 4' shark,"Hawaii News Now, 3/4/2024",,,,,,,,
3,25 Feb-2024,2024.0,Unprovoked,AUSTRALIA,Western Australia,"Sandlnd Island, Jurian Bay",,female,F,46,...,Tiger shark,"WA Today, 2/26/2024",,,,,,,,
4,14 Feb-2024,2024.0,Unprovoked,INDIA,Maharashtra,"Vaitarna River, Palghar District",Fishing,Vicky Suresh Govari,M,32,...,"Bull shark, 7'","Times of India, 2/14/2024",,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6964,,,,,,,,,,,...,,,,http://sharkattackfile.net/spreadsheets/pdf_di...,,,,,,
6965,,,,,,,,,,,...,,,,http://sharkattackfile.net/spreadsheets/pdf_di...,,,,,,
6966,,,,,,,,,,,...,,,,http://sharkattackfile.net/spreadsheets/pdf_di...,,,,,,
6967,,,,,,,,,,,...,,,,http://sharkattackfile.net/spreadsheets/pdf_di...,,,,,,


## Data processing

In [6]:
# Clean column names
shark_attack_raw = clean_columns_names(shark_attack_raw)

# Clean irrelevant columns
shark_attack_clean = columns_drops(shark_attack_raw, [ 'year', 'type', 'name', 'unnamed:_11', 'time', 'source', 'pdf', 'href_formula', 'href', 'case_number', 'case_number.1',
       'original_order', 'unnamed:_21', 'unnamed:_22'])

In [7]:
# Delete rows with null values
shark_attack_clean = drop_rows_nulls(shark_attack_clean, thresh=2)

In [11]:
# Delete duplicates  
shark_attack_clean.drop_duplicates(inplace=True)
shark_attack_clean.shape

(6934, 9)

In [12]:
shark_attack_clean  = shark_attack_clean.dropna(how='all')
recuento_nulls = shark_attack_clean.isnull().sum()
recuento_nulls

date           0
country       50
state        481
location     563
activity     583
sex          579
age         2984
injury        35
species_    3123
dtype: int64

In [13]:
shark_attack_clean.head()

Unnamed: 0,date,country,state,location,activity,sex,age,injury,species_
0,15 Mar 2024,AUSTRALIA,Queensland,Bargara Beach,Swimming,F,13.0,"Minor injuries to back, abdomen and legs",Tiger shark
1,04 Mar 2024,USA,Hawaii,"Old Man's, Waikiki",Surfing,M,,"No injury, shark bit surfboard",Tiger shark 8'
2,02 Mar-2024,USA,Hawaii,"Rainbows, Oahu",Swimming,F,11.0,Lacerations to left foot,3' to 4' shark
3,25 Feb-2024,AUSTRALIA,Western Australia,"Sandlnd Island, Jurian Bay",,F,46.0,Leg bitten,Tiger shark
4,14 Feb-2024,INDIA,Maharashtra,"Vaitarna River, Palghar District",Fishing,M,32.0,Calf of lower left leg injured,"Bull shark, 7'"


In [14]:
#Tratamiento de fechas
shark_attack_values = clean_and_filter_dates(shark_attack_clean, 'date', '1989')

In [15]:
shark_attack_values.dtypes

date        datetime64[ns]
country             object
state               object
location            object
activity            object
sex                 object
age                 object
injury              object
species_            object
dtype: object

In [16]:
shark_attack_values['injuries'] = shark_attack_values['injury'].apply(categorize_injury)
shark_attack_values.columns

Index(['date', 'country', 'state', 'location', 'activity', 'sex', 'age',
       'injury', 'species_', 'injuries'],
      dtype='object')

In [17]:
shark_attack_values['injuries'].value_counts()

injuries
No Fatal    2639
Fatal        324
Name: count, dtype: int64

In [20]:
shark_attack_values['activity'] = shark_attack_values['activity'].apply(filter_activity)

In [21]:
shark_attack_values['activity'].value_counts()

activitys
Surfing          911
Swimming         473
Fishing          196
Spearfishing     172
Unknown          136
                ... 
Bodysurfing        1
Transatlantic      1
Hand               1
Lobster            1
Board              1
Name: count, Length: 171, dtype: int64

In [23]:
# Eliminamos las columnas antiguas
shark_attack_values = columns_drops(shark_attack_values, ['injury', 'activity'])

In [24]:
shark_attack_values.columns

Index(['date', 'country', 'state', 'location', 'sex', 'age', 'species_',
       'injuries', 'activitys'],
      dtype='object')

In [25]:
shark_attack_values['country'] = shark_attack_values['country'].apply(clean_strings)

In [26]:
shark_attack_values['country'] 

0       AUSTRALIA
1             USA
2             USA
3       AUSTRALIA
4           INDIA
          ...    
3260          USA
3262      REUNION
3263          USA
3264          USA
3265          USA
Name: country, Length: 2963, dtype: object

In [27]:
shark_attack_values['state'] = shark_attack_values['state'].apply(clean_strings)
shark_attack_values['state'] 

0              Queensland
1                  Hawaii
2                  Hawaii
3       Western Australia
4             Maharashtra
              ...        
3260               Hawaii
3262         Sainte Marie
3263               Hawaii
3264              Florida
3265           California
Name: state, Length: 2963, dtype: object

In [28]:
shark_attack_values.shape

(2963, 9)

In [29]:
shark_attack_values.columns

Index(['date', 'country', 'state', 'location', 'sex', 'age', 'species_',
       'injuries', 'activitys'],
      dtype='object')

In [31]:
shark_attack_values = generate_case_numbers(shark_attack_values)
shark_attack_values

Unnamed: 0,date,country,state,location,sex,age,species_,injuries,activitys,case_number
0,2024-03-15,AUSTRALIA,Queensland,Bargara Beach,F,13,Tiger shark,No Fatal,Swimming,ND.2963
1,2024-03-04,USA,Hawaii,"Old Man's, Waikiki",M,,Tiger shark 8',No Fatal,Surfing,ND.2962
2,2024-03-02,USA,Hawaii,"Rainbows, Oahu",F,11,3' to 4' shark,No Fatal,Swimming,ND.2961
3,2024-02-25,AUSTRALIA,Western Australia,"Sandlnd Island, Jurian Bay",F,46,Tiger shark,No Fatal,Unknown,ND.2960
4,2024-02-14,INDIA,Maharashtra,"Vaitarna River, Palghar District",M,32,"Bull shark, 7'",No Fatal,Fishing,ND.2959
...,...,...,...,...,...,...,...,...,...,...
2958,1990-04-01,USA,Hawaii,"Silver (Silva) Channels, Waialua, O'ahu",M,,,No Fatal,Sitting,ND.0005
2959,1990-03-05,REUNION,Sainte Marie,Baie de la Mare,M,,3 m [10'] bull shark,No Fatal,Surfing,ND.0004
2960,1990-02-17,USA,Hawaii,"Mokapu, Kane'ohe Marine Air Corps Station, O'ahu",M,,Two sharks seen in vicinity: 2.4 m & 4.25 m [...,Fatal,Scuba,ND.0003
2961,1990-02-05,USA,Florida,"Monster Hole, Sebastian Inlet, Indian River Co...",M,33,,No Fatal,Board,ND.0002


In [32]:
shark_attack_values['location'] = shark_attack_values['location'].apply(clean_strings)
shark_attack_values

Unnamed: 0,date,country,state,location,sex,age,species_,injuries,activitys,case_number
0,2024-03-15,AUSTRALIA,Queensland,Bargara Beach,F,13,Tiger shark,No Fatal,Swimming,ND.2963
1,2024-03-04,USA,Hawaii,"Old Man's, Waikiki",M,,Tiger shark 8',No Fatal,Surfing,ND.2962
2,2024-03-02,USA,Hawaii,"Rainbows, Oahu",F,11,3' to 4' shark,No Fatal,Swimming,ND.2961
3,2024-02-25,AUSTRALIA,Western Australia,"Sandlnd Island, Jurian Bay",F,46,Tiger shark,No Fatal,Unknown,ND.2960
4,2024-02-14,INDIA,Maharashtra,"Vaitarna River, Palghar District",M,32,"Bull shark, 7'",No Fatal,Fishing,ND.2959
...,...,...,...,...,...,...,...,...,...,...
2958,1990-04-01,USA,Hawaii,"Silver Silva Channels, Waialua, O'ahu",M,,,No Fatal,Sitting,ND.0005
2959,1990-03-05,REUNION,Sainte Marie,Baie de la Mare,M,,3 m [10'] bull shark,No Fatal,Surfing,ND.0004
2960,1990-02-17,USA,Hawaii,"Mokapu, Kane'ohe Marine Air Corps Station, O'ahu",M,,Two sharks seen in vicinity: 2.4 m & 4.25 m [...,Fatal,Scuba,ND.0003
2961,1990-02-05,USA,Florida,"Monster Hole, Sebastian Inlet, Indian River Co...",M,33,,No Fatal,Board,ND.0002


In [33]:
shark_attack_values.to_csv('shark_attack_clean.csv')

## Gráficos