Analyzing squirrel census data in New York, 2018 

In [138]:
import pandas as pd
import sqlalchemy
import numpy as np

from dotenv import load_dotenv
import os

load_dotenv()
               
# preparations
squirrel_df = pd.read_csv(os.getenv('DATASET_FILE'), 
                            delimiter=';', quotechar='"', escapechar='\\')

engine = sqlalchemy.create_engine(url=os.getenv('DATABASE_URL'),  isolation_level = "REPEATABLE READ")

squirrel_df.head()


Unnamed: 0,X,Y,Unique Squirrel ID,Hectare,Shift,Date,Hectare Squirrel Number,Age,Primary Fur Color,Highlight Fur Color,...,Kuks,Quaas,Moans,Tail flags,Tail twitches,Approaches,Indifferent,Runs from,Other Interactions,Lat/Long
0,-73.956134,40.794082,37F-PM-1014-03,37F,PM,10142018,3,,,,...,False,False,False,False,False,False,False,False,,POINT (-73.9561344937861 40.7940823884086)
1,-73.968857,40.783783,21B-AM-1019-04,21B,AM,10192018,4,,,,...,False,False,False,False,False,False,False,False,,POINT (-73.9688574691102 40.7837825208444)
2,-73.974281,40.775534,11B-PM-1014-08,11B,PM,10142018,8,,Gray,,...,False,False,False,False,False,False,False,False,,POINT (-73.97428114848522 40.775533619083)
3,-73.959641,40.790313,32E-PM-1017-14,32E,PM,10172018,14,Adult,Gray,,...,False,False,False,False,False,False,False,True,,POINT (-73.9596413903948 40.7903128889029)
4,-73.970268,40.776213,13E-AM-1017-05,13E,AM,10172018,5,Adult,Gray,Cinnamon,...,False,False,False,False,False,False,False,False,,POINT (-73.9702676472613 40.7762126854894)


In [139]:
squirrel_df.rename(columns=lambda x: x.replace(' ', '_').replace('/', '_').lower(), inplace=True)

Normalize the data into three tables: squirrels, behaviors, locations

In [140]:
#squirrels data
squirrels = squirrel_df[['unique_squirrel_id', 'shift', 'date', 'hectare_squirrel_number', 'age', 
                        'primary_fur_color', 'highlight_fur_color', 
                        'combination_of_primary_and_highlight_color', 'color_notes']]


behaviors = squirrel_df[['running', 'chasing', 'climbing', 'eating', 'foraging', 'unique_squirrel_id', 'other_activities',
                         'kuks', 'quaas', 'moans', 'tail_flags', 'tail_twitches', 'approaches', 'indifferent', 'runs_from',
                         'other_interactions']]
behaviors.insert(0, 'behavior_id', behaviors.index)

behaviors.set_index('behavior_id')

locations = squirrel_df[['unique_squirrel_id', 'x', 'y', 'hectare', 'above_ground_sighter_measurement', 'specific_location', 'lat_long']]
locations.insert(0, 'location_id', locations.index)

In [146]:
locations.set_index('location_id')

#save all dataframes as db tables
squirrels.to_sql('squirrels', engine, index=True, if_exists='replace')
behaviors.to_sql('behaviors', engine, index=True, if_exists='replace')
locations.to_sql('locations', engine, index=True, if_exists='replace')

23

In [65]:
# connecting to postgres db
%load_ext sql

The sql extension is already loaded. To reload it, use:
  %reload_ext sql


In [66]:
%sql postgresql://postgres:KevSu15E@localhost/squirrel_data

Tables columns and its datatypes

In [148]:
%%sql
SELECT 
    column_name,
    data_type
FROM INFORMATION_SCHEMA.columns
WHERE (table_schema = 'public')
    AND (table_name = 'squirrels');

 * postgresql://postgres:***@localhost/squirrel_data
10 rows affected.


column_name,data_type
index,bigint
unique_squirrel_id,text
shift,text
date,bigint
hectare_squirrel_number,bigint
age,text
primary_fur_color,text
highlight_fur_color,text
combination_of_primary_and_highlight_color,text
color_notes,text


In [150]:
%%sql
SELECT 
    column_name,
    data_type
FROM INFORMATION_SCHEMA.columns
WHERE (table_schema = 'public')
    AND (table_name = 'behaviors');

 * postgresql://postgres:***@localhost/squirrel_data
18 rows affected.


column_name,data_type
index,bigint
behavior_id,bigint
running,boolean
chasing,boolean
climbing,boolean
eating,boolean
foraging,boolean
unique_squirrel_id,text
other_activities,text
kuks,boolean


In [None]:
%%sql
SELECT 
    column_name,
    data_type
FROM INFORMATION_SCHEMA.columns
WHERE (table_schema = 'public')
    AND (table_name = 'locations');