### Introduction

[Ocearch.org](https://www.ocearch.org/about/) is an organization that collects data on marine wildlife including Sharks.  This notebook will pull in data from the ocearch.org tracker and format it for follow on analysis

### Code

#### Pull data

In [1]:
import requests
url = "http://www.ocearch.org/tracker/ajax/filter-sharks"
headers = {'Accept' : 'application/json'}
resp = requests.get(url, headers=headers)
resp

<Response [200]>

#### Explore data

In [2]:
for k, v in sorted(resp.json()[0].items()):
    print(k, str(v)[:200])

active 1
cnt_inactive_pings 0
description <p>Oprah was named by expedition leader Chris Fischer. Fischer named her after Oprah Winfrey, one of America&#39;s great philanthropists who has done much with education in Africa.</p>

dist_24_hours 0.000
dist_72_hours 0.000
dist_total 2816.662
gender Female
id 3
images [{'id': '218', 'filename': 'Screen Shot 2013-06-20 at 11.15.57 AM.png', 'encrypted_name': '4ffefafc2f699e5837c56cb2043b9798', 'description': None, 'is_primary': True}, {'id': '188', 'filename': 'Scree
isMobile False
is_alive 1
last_updated 1531850549
latestPing 1404703826
length 9 ft 10 in.
name Oprah
pingAge sharkmore30
pingCriteria {'interval': '30 year'}
pings [{'active': '1', 'id': '36902', 'datetime': '6 July 2014 1:57:28 PM', 'tz_datetime': '6 July 2014 1:57:28 PM +0900', 'latitude': '-34.60661', 'longitude': '21.15244'}, {'active': '1', 'id': '36666', '
platform None
profile_url http://dev.ocearch.org/profile/oprah/
species White Shark (Carcharodon carcharias)
species_i

#### Parse main json response

In [3]:
import pandas as pd
df = pd.DataFrame(resp.json())
columns = ['id', 'name', 'gender', 'species', 'weight', 'length', 'tagDate', 'dist_total']
df[columns].head()

Unnamed: 0,id,name,gender,species,weight,length,tagDate,dist_total
0,3,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,9 ft 10 in.,7 March 2012,2816.662
1,4,Albertina,Female,White Shark (Carcharodon carcharias),1110 lb,11 ft 6 in.,8 March 2012,1830.593
2,5,Helen,Female,White Shark (Carcharodon carcharias),765 lb,10 ft 2 in.,8 March 2012,4436.661
3,6,Brenda,Female,White Shark (Carcharodon carcharias),1310 lb,12 ft 2 in.,8 March 2012,2966.902
4,7,Madiba,Male,White Shark (Carcharodon carcharias),659 lb,9 ft 8 in.,8 March 2012,3537.423


#### Filter out non-shark data

In [4]:
df.shape

(275, 30)

In [5]:
df = df[df.species.fillna('').str.contains('shark', case=False)]
df.shape

(239, 30)

#### Extract ping data
Each row in the dataframe represents an individual shark, but may have hundreds or thousands of 'pings' associated with that shark

In [6]:
df.pings[0][:3]

[{'active': '1',
  'datetime': '6 July 2014 1:57:28 PM',
  'id': '36902',
  'latitude': '-34.60661',
  'longitude': '21.15244',
  'tz_datetime': '6 July 2014 1:57:28 PM +0900'},
 {'active': '1',
  'datetime': '23 June 2014 11:40:09 AM',
  'id': '36666',
  'latitude': '-34.78752',
  'longitude': '19.42479',
  'tz_datetime': '23 June 2014 11:40:09 AM +0900'},
 {'active': '1',
  'datetime': '15 June 2014 10:15:44 PM',
  'id': '36500',
  'latitude': '-34.42487',
  'longitude': '21.09754',
  'tz_datetime': '15 June 2014 10:15:44 PM +0900'}]

In [7]:
ping_frames = []
for row in df.itertuples():
    ping_frame = pd.DataFrame(row.pings)
    ping_frame['id'] = row.id
    ping_frames.append(ping_frame)
    
len(ping_frames)

239

In [8]:
ping_frames[0].head()

Unnamed: 0,active,datetime,id,latitude,longitude,tz_datetime
0,1,6 July 2014 1:57:28 PM,3,-34.60661,21.15244,6 July 2014 1:57:28 PM +0900
1,1,23 June 2014 11:40:09 AM,3,-34.78752,19.42479,23 June 2014 11:40:09 AM +0900
2,1,15 June 2014 10:15:44 PM,3,-34.42487,21.09754,15 June 2014 10:15:44 PM +0900
3,1,3 June 2014 11:23:57 AM,3,-34.70432271674724,20.21013441406251,3 June 2014 11:23:57 AM +0900
4,1,29 May 2014 4:53:57 AM,3,-34.65556,19.37459,29 May 2014 4:53:57 AM +0900


#### Merge ping data

In [9]:
pings = pd.concat(ping_frames)
pings.shape

(65793, 6)

In [10]:
pings.head()

Unnamed: 0,active,datetime,id,latitude,longitude,tz_datetime
0,1,6 July 2014 1:57:28 PM,3,-34.60661,21.15244,6 July 2014 1:57:28 PM +0900
1,1,23 June 2014 11:40:09 AM,3,-34.78752,19.42479,23 June 2014 11:40:09 AM +0900
2,1,15 June 2014 10:15:44 PM,3,-34.42487,21.09754,15 June 2014 10:15:44 PM +0900
3,1,3 June 2014 11:23:57 AM,3,-34.70432271674724,20.21013441406251,3 June 2014 11:23:57 AM +0900
4,1,29 May 2014 4:53:57 AM,3,-34.65556,19.37459,29 May 2014 4:53:57 AM +0900


In [11]:
# clean up datetime columns
pings['datetime'] = pd.to_datetime(pings.tz_datetime)
pings.drop(columns=['tz_datetime'], inplace=True)
pings.head()

Unnamed: 0,active,datetime,id,latitude,longitude
0,1,2014-07-06 04:57:28,3,-34.60661,21.15244
1,1,2014-06-23 02:40:09,3,-34.78752,19.42479
2,1,2014-06-15 13:15:44,3,-34.42487,21.09754
3,1,2014-06-03 02:23:57,3,-34.70432271674724,20.21013441406251
4,1,2014-05-28 19:53:57,3,-34.65556,19.37459


In [12]:
df[columns].head()

Unnamed: 0,id,name,gender,species,weight,length,tagDate,dist_total
0,3,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,9 ft 10 in.,7 March 2012,2816.662
1,4,Albertina,Female,White Shark (Carcharodon carcharias),1110 lb,11 ft 6 in.,8 March 2012,1830.593
2,5,Helen,Female,White Shark (Carcharodon carcharias),765 lb,10 ft 2 in.,8 March 2012,4436.661
3,6,Brenda,Female,White Shark (Carcharodon carcharias),1310 lb,12 ft 2 in.,8 March 2012,2966.902
4,7,Madiba,Male,White Shark (Carcharodon carcharias),659 lb,9 ft 8 in.,8 March 2012,3537.423


In [13]:
joined = pings.merge(df[columns], on='id')
joined.shape

(65793, 12)

In [14]:
joined.head()

Unnamed: 0,active,datetime,id,latitude,longitude,name,gender,species,weight,length,tagDate,dist_total
0,1,2014-07-06 04:57:28,3,-34.60661,21.15244,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,9 ft 10 in.,7 March 2012,2816.662
1,1,2014-06-23 02:40:09,3,-34.78752,19.42479,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,9 ft 10 in.,7 March 2012,2816.662
2,1,2014-06-15 13:15:44,3,-34.42487,21.09754,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,9 ft 10 in.,7 March 2012,2816.662
3,1,2014-06-03 02:23:57,3,-34.70432271674724,20.21013441406251,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,9 ft 10 in.,7 March 2012,2816.662
4,1,2014-05-28 19:53:57,3,-34.65556,19.37459,Oprah,Female,White Shark (Carcharodon carcharias),686 lb,9 ft 10 in.,7 March 2012,2816.662


### Write to disk

#### Write semi-cleaned results to disk

In [15]:
from IPython.display import FileLink

fname = '../data/sharks.csv'
joined.to_csv(fname, index=False)
FileLink(fname)

#### Clean length/weight columns

In [16]:
def clean_weight(value):
    if not value:
        return value
    # most values are like "123 lb"
    for character in 'lbs,+':
        value = value.replace(character, '')
    return float(value)

joined['weight'] = joined.weight.apply(clean_weight)
joined.head()

Unnamed: 0,active,datetime,id,latitude,longitude,name,gender,species,weight,length,tagDate,dist_total
0,1,2014-07-06 04:57:28,3,-34.60661,21.15244,Oprah,Female,White Shark (Carcharodon carcharias),686.0,9 ft 10 in.,7 March 2012,2816.662
1,1,2014-06-23 02:40:09,3,-34.78752,19.42479,Oprah,Female,White Shark (Carcharodon carcharias),686.0,9 ft 10 in.,7 March 2012,2816.662
2,1,2014-06-15 13:15:44,3,-34.42487,21.09754,Oprah,Female,White Shark (Carcharodon carcharias),686.0,9 ft 10 in.,7 March 2012,2816.662
3,1,2014-06-03 02:23:57,3,-34.70432271674724,20.21013441406251,Oprah,Female,White Shark (Carcharodon carcharias),686.0,9 ft 10 in.,7 March 2012,2816.662
4,1,2014-05-28 19:53:57,3,-34.65556,19.37459,Oprah,Female,White Shark (Carcharodon carcharias),686.0,9 ft 10 in.,7 March 2012,2816.662


In [17]:
def clean_length(value):
    if not value:
        return value
    # most length values are like '3 ft 4 in.'
    total = 0
    if 'ft' in value:
        ft, inches = value.split('ft')
        total += int(ft.strip()) * 12
    else:
        inches = value
    if inches.strip():
        total += float(inches.strip().split()[0])
    return total

joined['length'] = joined.length.apply(clean_length)
joined.head()

Unnamed: 0,active,datetime,id,latitude,longitude,name,gender,species,weight,length,tagDate,dist_total
0,1,2014-07-06 04:57:28,3,-34.60661,21.15244,Oprah,Female,White Shark (Carcharodon carcharias),686.0,118.0,7 March 2012,2816.662
1,1,2014-06-23 02:40:09,3,-34.78752,19.42479,Oprah,Female,White Shark (Carcharodon carcharias),686.0,118.0,7 March 2012,2816.662
2,1,2014-06-15 13:15:44,3,-34.42487,21.09754,Oprah,Female,White Shark (Carcharodon carcharias),686.0,118.0,7 March 2012,2816.662
3,1,2014-06-03 02:23:57,3,-34.70432271674724,20.21013441406251,Oprah,Female,White Shark (Carcharodon carcharias),686.0,118.0,7 March 2012,2816.662
4,1,2014-05-28 19:53:57,3,-34.65556,19.37459,Oprah,Female,White Shark (Carcharodon carcharias),686.0,118.0,7 March 2012,2816.662


#### Write cleaned results to disk

In [18]:
fname = '../data/sharks_cleaned.csv'
joined.to_csv(fname, index=False)
FileLink(fname)