In [1]:
# Import dependencies
import pandas as pd
from pathlib import Path

In [3]:
squirrel_2020 = Path("Resources/squirrel_data_2020.csv")
squirrel_2018 = Path("Resources/squirrel_data_2018.csv")

## 2020 Dataset

In [5]:
# Create dataframe from CSV
s2020_df = pd.read_csv(squirrel_2020, encoding="unicode_escape")

# Display shape and head
print(f"2020 Dataset: {s2020_df.shape}")
s2020_df.head()

2020 Dataset: (433, 16)


Unnamed: 0,Area Name,Area ID,Park Name,Park ID,Squirrel ID,Primary Fur Color,Highlights in Fur Color,Color Notes,Location,Above Ground (Height in Feet),Specific Location,Activities,Interactions with Humans,Other Notes or Observations,Squirrel Latitude (DD.DDDDDD),Squirrel Longitude (-DD.DDDDDD)
0,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-01,Gray,White,,Ground Plane,,,Foraging,Indifferent,,40.85941,-73.933936
1,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-02,Gray,White,,Ground Plane,,,Foraging,Indifferent,Looks skinny,40.859436,-73.933937
2,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-03,Gray,White,,Ground Plane,,,"Eating, Digging something",Indifferent,,40.859416,-73.933894
3,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-04,Gray,White,,Ground Plane,,,Running,Indifferent,,40.859418,-73.933895
4,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-05,Gray,Cinnamon,,Ground Plane,,,"Running, Eating",Indifferent,She left food,40.859493,-73.93359


In [15]:
# Print columns
s2020_df.columns

Index(['Area Name', 'Area ID', 'Park Name', 'Park ID', 'Squirrel ID',
       'Primary Fur Color', 'Highlights in Fur Color', 'Color Notes',
       'Location', 'Above Ground (Height in Feet)', 'Specific Location',
       'Activities', 'Interactions with Humans', 'Other Notes or Observations',
       'Squirrel Latitude (DD.DDDDDD)', 'Squirrel Longitude (-DD.DDDDDD)'],
      dtype='object')

### Minimum requirements:
- lat and long
- park ID (link to park CSV)
- squirrel ID (unique?)
- primary colour
- DROP NULL for: highlights in fur color

NOTE: Data collected 01 March 2020 (3:00PM to 4:38PM)

In [7]:
# Confirm "Squirrel ID" is unique, for use as a primary key later
s2020_df['Squirrel ID'].nunique()

433

In [10]:
# Drop null values for minimum requirement columns
s2020_nonull = s2020_df.dropna(
    subset=["Highlights in Fur Color", "Activities", "Other Notes or Observations", "Squirrel Latitude (DD.DDDDDD)", "Squirrel Longitude (-DD.DDDDDD)"],
    how="any")

s2020_nonull

Unnamed: 0,Area Name,Area ID,Park Name,Park ID,Squirrel ID,Primary Fur Color,Highlights in Fur Color,Color Notes,Location,Above Ground (Height in Feet),Specific Location,Activities,Interactions with Humans,Other Notes or Observations,Squirrel Latitude (DD.DDDDDD),Squirrel Longitude (-DD.DDDDDD)
1,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-02,Gray,White,,Ground Plane,,,Foraging,Indifferent,Looks skinny,40.859436,-73.933937
4,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-05,Gray,Cinnamon,,Ground Plane,,,"Running, Eating",Indifferent,She left food,40.859493,-73.933590
10,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-11,Gray,Black,,Ground Plane,,,"Eating, Digging",Indifferent,was intimidated by a dog,40.859576,-73.933738
36,UPPER MANHATTAN,A,Highbridge Park,3,A-03-01,Gray,Cinnamon,,"Above Ground, Specific Location",2,in tree,"Chasing, Climbing",Watches us from tree,#1 and #2 chasing each other,40.841178,-73.935482
37,UPPER MANHATTAN,A,Highbridge Park,3,A-03-02,Gray,White,,"Above Ground, Specific Location",2,in tree,"Chasing, Climbing, Eating",Runs From,#1 and #2 chasing each other,40.841204,-73.935434
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
428,BROOKLYN,D,McCarren Park,22,D-22-40,Cinnamon,White,,Ground Plane,,,Foraging,Indifferent,Fluffy,40.721173,-73.950781
429,BROOKLYN,D,McCarren Park,22,D-22-41,Cinnamon,Cinnamon,,Above Ground,,,"Running, Foraging",Indifferent,Skinny,40.721161,-73.950732
430,BROOKLYN,D,McCarren Park,22,D-22-42,Gray,Gray,,Above Ground,,,Climbing,Indifferent,Clinging to tree,40.721124,-73.950797
431,BROOKLYN,D,McCarren Park,22,D-22-43,Gray,White,,Above Ground,10,,"Running, Chasing, Climbing",Indifferent,Playing with another squirrel in a tree,40.721026,-73.950765


## 2018 Dataset

In [14]:
# Create dataframe from CSV
s2018_df = pd.read_csv(squirrel_2018)

# Display shape and head
print(f"2018 Dataset: {s2018_df.shape}")
s2018_df.head()

2018 Dataset: (3023, 31)


Unnamed: 0,X,Y,Unique Squirrel ID,Hectare,Shift,Date,Hectare Squirrel Number,Age,Primary Fur Color,Highlight Fur Color,...,Kuks,Quaas,Moans,Tail flags,Tail twitches,Approaches,Indifferent,Runs from,Other Interactions,Lat/Long
0,-73.956134,40.794082,37F-PM-1014-03,37F,PM,10142018,3,,,,...,False,False,False,False,False,False,False,False,,POINT (-73.9561344937861 40.7940823884086)
1,-73.968857,40.783783,21B-AM-1019-04,21B,AM,10192018,4,,,,...,False,False,False,False,False,False,False,False,,POINT (-73.9688574691102 40.7837825208444)
2,-73.974281,40.775534,11B-PM-1014-08,11B,PM,10142018,8,,Gray,,...,False,False,False,False,False,False,False,False,,POINT (-73.97428114848522 40.775533619083)
3,-73.959641,40.790313,32E-PM-1017-14,32E,PM,10172018,14,Adult,Gray,,...,False,False,False,False,False,False,False,True,,POINT (-73.9596413903948 40.7903128889029)
4,-73.970268,40.776213,13E-AM-1017-05,13E,AM,10172018,5,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,False,False,,POINT (-73.9702676472613 40.7762126854894)


In [17]:
# Print columns
s2018_df.columns

Index(['X', 'Y', 'Unique Squirrel ID', 'Hectare', 'Shift', 'Date',
       'Hectare Squirrel Number', 'Age', 'Primary Fur Color',
       'Highlight Fur Color', 'Combination of Primary and Highlight Color',
       'Color notes', 'Location', 'Above Ground Sighter Measurement',
       'Specific Location', 'Running', 'Chasing', 'Climbing', 'Eating',
       'Foraging', 'Other Activities', 'Kuks', 'Quaas', 'Moans', 'Tail flags',
       'Tail twitches', 'Approaches', 'Indifferent', 'Runs from',
       'Other Interactions', 'Lat/Long'],
      dtype='object')

In [19]:
s2018_nonull = s2018_df.dropna(
    subset=["Primary Fur Color", "X", "Y"],
    how="any")

s2018_nonull

Unnamed: 0,X,Y,Unique Squirrel ID,Hectare,Shift,Date,Hectare Squirrel Number,Age,Primary Fur Color,Highlight Fur Color,...,Kuks,Quaas,Moans,Tail flags,Tail twitches,Approaches,Indifferent,Runs from,Other Interactions,Lat/Long
2,-73.974281,40.775534,11B-PM-1014-08,11B,PM,10142018,8,,Gray,,...,False,False,False,False,False,False,False,False,,POINT (-73.97428114848522 40.775533619083)
3,-73.959641,40.790313,32E-PM-1017-14,32E,PM,10172018,14,Adult,Gray,,...,False,False,False,False,False,False,False,True,,POINT (-73.9596413903948 40.7903128889029)
4,-73.970268,40.776213,13E-AM-1017-05,13E,AM,10172018,5,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,False,False,,POINT (-73.9702676472613 40.7762126854894)
5,-73.968361,40.772591,11H-AM-1010-03,11H,AM,10102018,3,Adult,Cinnamon,White,...,False,False,False,False,True,False,True,False,,POINT (-73.9683613516225 40.7725908847499)
6,-73.954120,40.793181,36H-AM-1010-02,36H,AM,10102018,2,Adult,Gray,,...,False,False,False,False,False,False,False,False,,POINT (-73.9541201789795 40.7931811701082)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3018,-73.963943,40.790868,30B-AM-1007-04,30B,AM,10072018,4,Adult,Gray,,...,False,False,False,False,False,False,False,True,,POINT (-73.9639431360458 40.7908677445466)
3019,-73.970402,40.782560,19A-PM-1013-05,19A,PM,10132018,5,Adult,Gray,White,...,False,False,False,False,False,False,True,False,,POINT (-73.9704015859639 40.7825600069973)
3020,-73.966587,40.783678,22D-PM-1012-07,22D,PM,10122018,7,Adult,Gray,"Black, Cinnamon, White",...,False,False,False,False,False,False,True,False,,POINT (-73.9665871993517 40.7836775064883)
3021,-73.963994,40.789915,29B-PM-1010-02,29B,PM,10102018,2,,Gray,"Cinnamon, White",...,False,False,False,False,False,False,True,False,,POINT (-73.9639941227864 40.7899152327912)
