In [1]:
# Import dependencies
import pandas as pd
from pathlib import Path
import re

# Import Data

In [2]:
# Base datasets
squirrel_2020 = Path("Resources/squirrel_data_2020.csv")
squirrel_2018 = Path("Resources/squirrel_data_2018.csv")

# Additional dataset from 2020
parks_2020 = Path("Resources/park_data_2020.csv")

# 2020 Dataset - Squirrels

In [3]:
# Create dataframe from CSV
s2020_df = pd.read_csv(squirrel_2020, encoding="unicode_escape")

# Display DataFrame and its shape
print(f"2020 Squirrel Dataset: {s2020_df.shape}")
s2020_df.head()

2020 Squirrel Dataset: (433, 16)


Unnamed: 0,Area Name,Area ID,Park Name,Park ID,Squirrel ID,Primary Fur Color,Highlights in Fur Color,Color Notes,Location,Above Ground (Height in Feet),Specific Location,Activities,Interactions with Humans,Other Notes or Observations,Squirrel Latitude (DD.DDDDDD),Squirrel Longitude (-DD.DDDDDD)
0,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-01,Gray,White,,Ground Plane,,,Foraging,Indifferent,,40.85941,-73.933936
1,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-02,Gray,White,,Ground Plane,,,Foraging,Indifferent,Looks skinny,40.859436,-73.933937
2,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-03,Gray,White,,Ground Plane,,,"Eating, Digging something",Indifferent,,40.859416,-73.933894
3,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-04,Gray,White,,Ground Plane,,,Running,Indifferent,,40.859418,-73.933895
4,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-05,Gray,Cinnamon,,Ground Plane,,,"Running, Eating",Indifferent,She left food,40.859493,-73.93359


In [4]:
# Drop null values for minimum requirement columns
dropna_columns = [
    "Primary Fur Color",
    "Highlights in Fur Color",
    "Activities",
    "Interactions with Humans",
    "Squirrel Latitude (DD.DDDDDD)",
    "Squirrel Longitude (-DD.DDDDDD)"
]

s2020_nonull = s2020_df.dropna(subset=dropna_columns, how="any")

# Display DataFrame and its shape
print(f"{s2020_nonull.shape}")
s2020_nonull.head()

(192, 16)


Unnamed: 0,Area Name,Area ID,Park Name,Park ID,Squirrel ID,Primary Fur Color,Highlights in Fur Color,Color Notes,Location,Above Ground (Height in Feet),Specific Location,Activities,Interactions with Humans,Other Notes or Observations,Squirrel Latitude (DD.DDDDDD),Squirrel Longitude (-DD.DDDDDD)
0,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-01,Gray,White,,Ground Plane,,,Foraging,Indifferent,,40.85941,-73.933936
1,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-02,Gray,White,,Ground Plane,,,Foraging,Indifferent,Looks skinny,40.859436,-73.933937
2,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-03,Gray,White,,Ground Plane,,,"Eating, Digging something",Indifferent,,40.859416,-73.933894
3,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-04,Gray,White,,Ground Plane,,,Running,Indifferent,,40.859418,-73.933895
4,UPPER MANHATTAN,A,Fort Tryon Park,1,A-01-05,Gray,Cinnamon,,Ground Plane,,,"Running, Eating",Indifferent,She left food,40.859493,-73.93359


## Columns Cleaning
- Remove columns (2020):
    - 'Area Name', irrelevant.
    - 'Area ID, irrelevant.
    - 'Color Notes', irrelevant and too few data points.
    - 'Location', irrelevant and would take too much resources to clean the data. ***
    - 'Above Ground (...)', irrelevant and would take too much resources to clean the data. ***
    - 'Specific Location', irrelevant and would take too much resources to clean the data.
- Remove columns (2018):
    - 'Hectare', inconsistent with 2020 dataset.
    - 'Shift', inconsistent with 2020 dataset.
    - 'Hectare Squirrel Number', inconsistent with 2020 dataset.
    - 'Age', inconsistent with 2020 dataset.
    - 'Date', inconsistent with 2020 dataset, right??? We know 2018 data was collected in October [Autumn] and 2020 data was collected in March [Spring]
    - 'Combination of Primary and Highlight Color', inconsistent with 2020 dataset.
    - 'Color notes', inconsistent with 2020 dataset.
    - 'Location', irrelevant and would take too much resources to clean the data. ***
    - 'Above Ground Sighter Measurement', irrelevant and would take too much resources to clean the data. ***
    - 'Specific Location', inconsistent with 2020 dataset.
    - 'Lat/Long', delete due to double up.
##### 2020  /  2018 Column Names (in order... can be changed):
    - park_name                 /  - park_name [Central Park only - can create new column to match 2020 dataset column]
    - park_ID                   /  - park_ID [DOESN'T EXIST IN 2018 DATASET - or maybe we keep this as an ID might be easier to map?]
    - squirrel_ID               /  - 'Unique Squirrel ID' (change to squirrel_ID)
    - primary_fur_color         /  - primary_fur_color
    - highlights_in_fur_color   /  - 'Highlight Fur Color' (change to highlights_in_fur_color)
    - foraging
    - climbing
    - eating
    - running
    - chasing
    - shouting                  /  - 'Kuks' + 'Quaas' + 'Moans' (change to shouting)
    - sitting
    - digging
    - other_activities          /  - 'Other Activities' + 'Tail flags' + 'Tail twitches' (changed to other_activities)
    - interactions_with_humans  /  - 'Approaches' + 'Indifferent' + 'Runs from' + 'Other Interactions' (changed to interactions_with_humans) [MAYBE 'OTHER SPECIES'?]
    - other_observations        /  - other_observations [DOESN'T EXIST IN 2018 DATASET]
    - squirrel_latitude         /  - 'Y' (change to squirrel_latitude)
    - squirrel_longitude        /  - 'X' (change to squirrel_longitude)

- Rename columns: use underscores, all lower case.

In [5]:
# Drop columns
drop_columns = ['Area Name', 'Area ID', "Color Notes", "Location", "Above Ground (Height in Feet)", "Specific Location"]

reduced_2020 = s2020_nonull.drop(columns=drop_columns)
reduced_2020.head()

Unnamed: 0,Park Name,Park ID,Squirrel ID,Primary Fur Color,Highlights in Fur Color,Activities,Interactions with Humans,Other Notes or Observations,Squirrel Latitude (DD.DDDDDD),Squirrel Longitude (-DD.DDDDDD)
0,Fort Tryon Park,1,A-01-01,Gray,White,Foraging,Indifferent,,40.85941,-73.933936
1,Fort Tryon Park,1,A-01-02,Gray,White,Foraging,Indifferent,Looks skinny,40.859436,-73.933937
2,Fort Tryon Park,1,A-01-03,Gray,White,"Eating, Digging something",Indifferent,,40.859416,-73.933894
3,Fort Tryon Park,1,A-01-04,Gray,White,Running,Indifferent,,40.859418,-73.933895
4,Fort Tryon Park,1,A-01-05,Gray,Cinnamon,"Running, Eating",Indifferent,She left food,40.859493,-73.93359


In [6]:
# Rename the columns
cols_df = pd.DataFrame(reduced_2020.columns, columns=["name"])

# Define the regex pattern
pattern = '\((.*?)\)' # \( and \) to escape brackets, *.? matches any character (non-newline) zero or more times

new_columns = []
for row in cols_df["name"]:
    stripped_row = re.findall(pattern, row)

    # Remove the brackets and content
    if len(stripped_row) > 0:
        remove_substring = f' ({stripped_row[0].strip("[]")})'
        row = row.strip(remove_substring)

    # Use underscore and cast to lowercase
    new_columns.append(row.replace(" ", "_").lower())

# Update the columns
reduced_2020.columns = new_columns

reduced_2020.head()

Unnamed: 0,park_name,park_id,squirrel_id,primary_fur_color,highlights_in_fur_color,activities,interactions_with_humans,other_notes_or_observations,squirrel_latitude,squirrel_longitude
0,Fort Tryon Park,1,A-01-01,Gray,White,Foraging,Indifferent,,40.85941,-73.933936
1,Fort Tryon Park,1,A-01-02,Gray,White,Foraging,Indifferent,Looks skinny,40.859436,-73.933937
2,Fort Tryon Park,1,A-01-03,Gray,White,"Eating, Digging something",Indifferent,,40.859416,-73.933894
3,Fort Tryon Park,1,A-01-04,Gray,White,Running,Indifferent,,40.859418,-73.933895
4,Fort Tryon Park,1,A-01-05,Gray,Cinnamon,"Running, Eating",Indifferent,She left food,40.859493,-73.93359


In [7]:
reduced_2020.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 192 entries, 0 to 432
Data columns (total 10 columns):
 #   Column                       Non-Null Count  Dtype  
---  ------                       --------------  -----  
 0   park_name                    192 non-null    object 
 1   park_id                      192 non-null    int64  
 2   squirrel_id                  192 non-null    object 
 3   primary_fur_color            192 non-null    object 
 4   highlights_in_fur_color      192 non-null    object 
 5   activities                   192 non-null    object 
 6   interactions_with_humans     192 non-null    object 
 7   other_notes_or_observations  96 non-null     object 
 8   squirrel_latitude            192 non-null    float64
 9   squirrel_longitude           192 non-null    float64
dtypes: float64(2), int64(1), object(7)
memory usage: 16.5+ KB


## Clean 'activities' column

In [8]:
# Create a DataFrame of the 'activities' column for cleaning
activities_df = reduced_2020[['squirrel_id', 'activities']].copy()
activities_df

Unnamed: 0,squirrel_id,activities
0,A-01-01,Foraging
1,A-01-02,Foraging
2,A-01-03,"Eating, Digging something"
3,A-01-04,Running
4,A-01-05,"Running, Eating"
...,...,...
428,D-22-40,Foraging
429,D-22-41,"Running, Foraging"
430,D-22-42,Climbing
431,D-22-43,"Running, Chasing, Climbing"


In [9]:
row_list = []
for row in activities_df['activities']:
    row_list.append(row.split(", "))

activities_df['activity_list'] = row_list
activities_df

Unnamed: 0,squirrel_id,activities,activity_list
0,A-01-01,Foraging,[Foraging]
1,A-01-02,Foraging,[Foraging]
2,A-01-03,"Eating, Digging something","[Eating, Digging something]"
3,A-01-04,Running,[Running]
4,A-01-05,"Running, Eating","[Running, Eating]"
...,...,...,...
428,D-22-40,Foraging,[Foraging]
429,D-22-41,"Running, Foraging","[Running, Foraging]"
430,D-22-42,Climbing,[Climbing]
431,D-22-43,"Running, Chasing, Climbing","[Running, Chasing, Climbing]"


In [10]:
idx_list = []
for idx, row in enumerate(activities_df['activity_list']):
    for word in row:
        if (word.startswith("Eating ")):
            print(idx, word)
            idx_list.append(idx)

129 Eating (or pretending to eat)
139 Eating (nuts)
178 Eating (bread crumbs)


In [11]:
activities_df.iloc[idx_list]

Unnamed: 0,squirrel_id,activities,activity_list
330,C-18-01,"Running, Eating (or pretending to eat)","[Running, Eating (or pretending to eat)]"
344,C-19-14,"Eating (nuts), Foraging","[Eating (nuts), Foraging]"
415,D-22-27,"Eating (bread crumbs), Foraging","[Eating (bread crumbs), Foraging]"


In [13]:
### CAN WE JUST DO THIS WITH THE reduced_2020 DATAFRAME TO AVOID HAVING TO MERGE? OR IS IT SAFER TO MAKE A COPY?
### I think it's easier to just make all the changes, because we'll create the boolean columns after this and merge it back?

# Combine 'child' activities that match 'parent' activities
pattern = 'Eating \((.*?)\)'
sitting = ["Hanging out", "Hanging", "Chilling", "Sitting in short tree", "Sticking out of a tree", "Very carefully watching a cat", "Watching #2", "Posing"]
shouting = ["Vocalization at us", "Defending tree"]
other_activities = ["Self-cleaning", "Sleeping", "Sleeping (Dead?)", "battery"] 

for row in activities_df['activity_list']:
    for idx, word in enumerate(row):

        # Check for "Eating (...)"
        bracket = re.findall(pattern, word)
        if (len(bracket) > 0):
            row[idx] = "Eating"

        # Check for "Digging "
        if (word.startswith("Digging ") or word == "Burying"):
            row[idx] = "Digging"

        if (word in shouting):
            row[idx] = "shouting"

        if (word == "Nesting/gathering leaves"):
            row[idx] = "Foraging"

        if (word in sitting):
            row[idx] = "Sitting"

        if (word == "Jumping"):
            row[idx] = "Climbing"
        
        if (word in other_activities):
            row[idx] = "other_activities"

        # # Convert each word to lowercase
        # row[idx] = word.lower()

In [14]:
# Check the 'activities' distribution
flat_list = [value for row in activities_df['activity_list'] for value in row]
flat_df = pd.DataFrame(flat_list)
flat_df.value_counts()

Foraging            72
Climbing            52
Eating              48
Running             37
Chasing             20
shouting            17
Sitting             14
Digging              5
other_activities     4
dtype: int64

In [15]:
# Create the boolean activity columns
activity_equivalent = ["running", "chasing", "climbing", "eating", "foraging", "shouting"]
set_2020 = set(flat_list)
set_2018 = set(activity_equivalent)

# The activities in this list will need to be accounted for from 2018's 'other activities'
account_for = list(set_2020.difference(set_2018))
print(f"Account for from 2018 dataset: {account_for}")

# Activities common to both list (note: account for "shouting" with [kuuks, quaas, moans])
common_activities = list(set_2020.intersection(set_2018))
print(f"Common between 2018 and 2020: {common_activities}")

# Create a list of the reduced activity names
column_names = list(set(flat_list))

# Create new columns for each activity
for col_name in column_names:
    activities_df[col_name] = pd.Series(dtype=bool)

# Loop through the 'activity_list' and populate the boolean columns
for idx, row in activities_df.iterrows():
    for word in row['activity_list']:
        for activity in column_names:
            if (word == activity):
                row[activity] = True
            else:
                row[activity] = False

# Display the DataFrame
activities_df

Account for from 2018 dataset: ['Foraging', 'other_activities', 'Climbing', 'Eating', 'Chasing', 'Running', 'Sitting', 'Digging']
Common between 2018 and 2020: ['shouting']


Unnamed: 0,squirrel_id,activities,activity_list,Foraging,other_activities,Climbing,shouting,Eating,Chasing,Running,Sitting,Digging
0,A-01-01,Foraging,[Foraging],True,False,False,False,False,False,False,False,False
1,A-01-02,Foraging,[Foraging],True,False,False,False,False,False,False,False,False
2,A-01-03,"Eating, Digging something","[Eating, Digging]",False,False,False,False,False,False,False,False,True
3,A-01-04,Running,[Running],False,False,False,False,False,False,True,False,False
4,A-01-05,"Running, Eating","[Running, Eating]",False,False,False,False,True,False,False,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...
428,D-22-40,Foraging,[Foraging],True,False,False,False,False,False,False,False,False
429,D-22-41,"Running, Foraging","[Running, Foraging]",True,False,False,False,False,False,False,False,False
430,D-22-42,Climbing,[Climbing],False,False,True,False,False,False,False,False,False
431,D-22-43,"Running, Chasing, Climbing","[Running, Chasing, Climbing]",False,False,True,False,False,False,False,False,False


In [16]:
# Drop the 'activities' and 'activity_list' columns
activities_df = activities_df.drop(columns=['activities', 'activity_list'])

In [17]:
# Merge with reduced_2020
s2020_updated = pd.merge(reduced_2020, activities_df, on="squirrel_id", how="inner")
s2020_updated.head()

Unnamed: 0,park_name,park_id,squirrel_id,primary_fur_color,highlights_in_fur_color,activities,interactions_with_humans,other_notes_or_observations,squirrel_latitude,squirrel_longitude,Foraging,other_activities,Climbing,shouting,Eating,Chasing,Running,Sitting,Digging
0,Fort Tryon Park,1,A-01-01,Gray,White,Foraging,Indifferent,,40.85941,-73.933936,True,False,False,False,False,False,False,False,False
1,Fort Tryon Park,1,A-01-02,Gray,White,Foraging,Indifferent,Looks skinny,40.859436,-73.933937,True,False,False,False,False,False,False,False,False
2,Fort Tryon Park,1,A-01-03,Gray,White,"Eating, Digging something",Indifferent,,40.859416,-73.933894,False,False,False,False,False,False,False,False,True
3,Fort Tryon Park,1,A-01-04,Gray,White,Running,Indifferent,,40.859418,-73.933895,False,False,False,False,False,False,True,False,False
4,Fort Tryon Park,1,A-01-05,Gray,Cinnamon,"Running, Eating",Indifferent,She left food,40.859493,-73.93359,False,False,False,False,True,False,False,False,False


# 2018 Dataset - Squirrels

In [164]:
# Create dataframe from CSV
s2018_df = pd.read_csv(squirrel_2018)

# Display DataFrame and its shape
print(f"2018 Dataset: {s2018_df.shape}")
s2018_df.head()

2018 Dataset: (3023, 31)


Unnamed: 0,X,Y,Unique Squirrel ID,Hectare,Shift,Date,Hectare Squirrel Number,Age,Primary Fur Color,Highlight Fur Color,...,Kuks,Quaas,Moans,Tail flags,Tail twitches,Approaches,Indifferent,Runs from,Other Interactions,Lat/Long
0,-73.956134,40.794082,37F-PM-1014-03,37F,PM,10142018,3,,,,...,False,False,False,False,False,False,False,False,,POINT (-73.9561344937861 40.7940823884086)
1,-73.968857,40.783783,21B-AM-1019-04,21B,AM,10192018,4,,,,...,False,False,False,False,False,False,False,False,,POINT (-73.9688574691102 40.7837825208444)
2,-73.974281,40.775534,11B-PM-1014-08,11B,PM,10142018,8,,Gray,,...,False,False,False,False,False,False,False,False,,POINT (-73.97428114848522 40.775533619083)
3,-73.959641,40.790313,32E-PM-1017-14,32E,PM,10172018,14,Adult,Gray,,...,False,False,False,False,False,False,False,True,,POINT (-73.9596413903948 40.7903128889029)
4,-73.970268,40.776213,13E-AM-1017-05,13E,AM,10172018,5,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,False,False,,POINT (-73.9702676472613 40.7762126854894)


In [165]:
# Identify the non-null count and data types
s2018_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3023 entries, 0 to 3022
Data columns (total 31 columns):
 #   Column                                      Non-Null Count  Dtype  
---  ------                                      --------------  -----  
 0   X                                           3023 non-null   float64
 1   Y                                           3023 non-null   float64
 2   Unique Squirrel ID                          3023 non-null   object 
 3   Hectare                                     3023 non-null   object 
 4   Shift                                       3023 non-null   object 
 5   Date                                        3023 non-null   int64  
 6   Hectare Squirrel Number                     3023 non-null   int64  
 7   Age                                         2902 non-null   object 
 8   Primary Fur Color                           2968 non-null   object 
 9   Highlight Fur Color                         1937 non-null   object 
 10  Combination 

In [166]:
# 2018 Dataset

# Drop null values for minimum requirement columns
s2018_nonull = s2018_df.dropna(
    subset=["Primary Fur Color", "Highlight Fur Color", "Running", "Chasing", "Climbing", "Eating", "Foraging", "Approaches", "Indifferent", "Runs from", "X", "Y"],
    how="any")

# NOTE: If you add "Other Interactions", dataset is reduced to 170 rows
# Number of unique values for "Other Notes or Observations"
print(f'Unique "other interactions": {s2018_nonull["Other Interactions"].nunique()}') # 152 unique interactions

# Display DataFrame and its shape
print(f"{s2018_nonull.shape}")
s2018_nonull.head()

Unique "other interactions": 152
(1937, 31)


Unnamed: 0,X,Y,Unique Squirrel ID,Hectare,Shift,Date,Hectare Squirrel Number,Age,Primary Fur Color,Highlight Fur Color,...,Kuks,Quaas,Moans,Tail flags,Tail twitches,Approaches,Indifferent,Runs from,Other Interactions,Lat/Long
4,-73.970268,40.776213,13E-AM-1017-05,13E,AM,10172018,5,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,False,False,,POINT (-73.9702676472613 40.7762126854894)
5,-73.968361,40.772591,11H-AM-1010-03,11H,AM,10102018,3,Adult,Cinnamon,White,...,False,False,False,False,True,False,True,False,,POINT (-73.9683613516225 40.7725908847499)
9,-73.97225,40.774288,11D-AM-1010-03,11D,AM,10102018,3,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,True,False,,POINT (-73.9722500196844 40.7742879599026)
10,-73.969506,40.782351,20B-PM-1013-05,20B,PM,10132018,5,Adult,Gray,White,...,False,False,False,False,False,False,True,False,,POINT (-73.9695063535333 40.7823507678183)
12,-73.953217,40.791967,36I-PM-1007-01,36I,PM,10072018,1,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,True,False,,POINT (-73.9532170504865 40.7919669739962)


In [167]:
# Rename the columns
cols_df = pd.DataFrame(s2018_nonull.columns, columns=["name"])

# Define the regex pattern
pattern = '\((.*?)\)' # \( and \) to escape brackets, *.? matches any character (non-newline) zero or more times

new_columns = []
for row in cols_df["name"]:
    # Use underscore and cast to lowercase
    new_columns.append(row.replace(" ", "_").lower())

# Update the columns
s2018_nonull.columns = new_columns

s2018_nonull.head()

Unnamed: 0,x,y,unique_squirrel_id,hectare,shift,date,hectare_squirrel_number,age,primary_fur_color,highlight_fur_color,...,kuks,quaas,moans,tail_flags,tail_twitches,approaches,indifferent,runs_from,other_interactions,lat/long
4,-73.970268,40.776213,13E-AM-1017-05,13E,AM,10172018,5,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,False,False,,POINT (-73.9702676472613 40.7762126854894)
5,-73.968361,40.772591,11H-AM-1010-03,11H,AM,10102018,3,Adult,Cinnamon,White,...,False,False,False,False,True,False,True,False,,POINT (-73.9683613516225 40.7725908847499)
9,-73.97225,40.774288,11D-AM-1010-03,11D,AM,10102018,3,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,True,False,,POINT (-73.9722500196844 40.7742879599026)
10,-73.969506,40.782351,20B-PM-1013-05,20B,PM,10132018,5,Adult,Gray,White,...,False,False,False,False,False,False,True,False,,POINT (-73.9695063535333 40.7823507678183)
12,-73.953217,40.791967,36I-PM-1007-01,36I,PM,10072018,1,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,True,False,,POINT (-73.9532170504865 40.7919669739962)


In [168]:
## Delete unwanted columns but maintain original copy
s2018_drop_columns = s2018_nonull.drop(columns=['hectare', 'shift', 'hectare_squirrel_number', 'age', 'other_interactions'], inplace=False)

# Rename columns to match 2020 dataset
s2018_rename_columns = s2018_drop_columns.rename(columns={"x": "squirrel_longitude", "y": "squirrel_latitude", "highlight_fur_colour": "highlights_in_fur_color", "unique_squirrel_id": "squirrel_id"})

s2018_rename_columns.head()

Unnamed: 0,squirrel_longitude,squirrel_latitude,squirrel_id,date,primary_fur_color,highlight_fur_color,combination_of_primary_and_highlight_color,color_notes,location,above_ground_sighter_measurement,...,other_activities,kuks,quaas,moans,tail_flags,tail_twitches,approaches,indifferent,runs_from,lat/long
4,-73.970268,40.776213,13E-AM-1017-05,10172018,Gray,Cinnamon,Gray+Cinnamon,,Above Ground,,...,,False,False,False,False,False,False,False,False,POINT (-73.9702676472613 40.7762126854894)
5,-73.968361,40.772591,11H-AM-1010-03,10102018,Cinnamon,White,Cinnamon+White,,,,...,,False,False,False,False,True,False,True,False,POINT (-73.9683613516225 40.7725908847499)
9,-73.97225,40.774288,11D-AM-1010-03,10102018,Gray,Cinnamon,Gray+Cinnamon,,Above Ground,30,...,grooming,False,False,False,False,False,False,True,False,POINT (-73.9722500196844 40.7742879599026)
10,-73.969506,40.782351,20B-PM-1013-05,10132018,Gray,White,Gray+White,,Ground Plane,FALSE,...,,False,False,False,False,False,False,True,False,POINT (-73.9695063535333 40.7823507678183)
12,-73.953217,40.791967,36I-PM-1007-01,10072018,Gray,Cinnamon,Gray+Cinnamon,,Ground Plane,FALSE,...,,False,False,False,False,False,False,True,False,POINT (-73.9532170504865 40.7919669739962)


In [169]:
# Check the "other activities" column
other_activities = pd.DataFrame(s2018_nonull["other_activities"].value_counts())
other_activities.head(50)

Unnamed: 0,other_activities
digging,14
sitting,9
playing,8
burying,6
nut in mouth,4
grooming,3
cleaning,3
walking,3
hopping,3
watching,3
