In [99]:
# Import dependencies

import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import psycopg2

In [111]:
# Load the cleaned NYC dogs dataset and convert to Dataframe
dog_data_file_path = "Resources/NYC_dogs_clean_forclass.csv"
NYC_dog_data = pd.read_csv(dog_data_file_path)
NYC_dog_data_df = pd.DataFrame(NYC_dog_data)
NYC_dog_data_df.head()

Unnamed: 0,UID,AnimalName,AnimalGender,AnimalBirthMonth,BreedName,Borough,ZipCode,CommunityDistrict,CensusTract,NTA,CityCouncilDistrict,CongressionalDistrict,StateSenatorialDistrict,LicenseIssuedDate,LicenseExpiredDate
0,0,SHADOW,M,1,Beagle,Brooklyn,11236,318.0,1014.0,BK50,46.0,8.0,19.0,12/29/14,1/30/16
1,1,ROCCO,M,10,Boxer,Brooklyn,11210,314.0,756.0,BK43,45.0,9.0,17.0,1/7/15,1/30/16
2,2,LUIGI,M,9,Maltese,Bronx,10464,210.0,516.0,BX10,13.0,14.0,34.0,1/17/15,2/2/16
3,3,PETUNIA,F,8,Pug,Brooklyn,11221,304.0,419.0,BK78,34.0,7.0,18.0,3/1/15,3/28/16
4,4,ROMEO,M,10,Maltese,Bronx,10451,201.0,65.0,BX34,17.0,15.0,32.0,3/9/15,3/9/16


In [112]:
NYC_dog_data_cleaned_df = NYC_dog_data_df.filter(['UID','Borough', 
                                                  'AnimalGender', 
                                                  'AnimalBirthMonth',
                                                  'BreedName', 
                                                  'CensusTract'], axis=1).rename(columns={'AnimalGender':'Gender', 
                                        'AnimalBirthMonth': 'BirthMonth'
                                        })
NYC_dog_data_cleaned_df

Unnamed: 0,UID,Borough,Gender,BirthMonth,BreedName,CensusTract
0,0,Brooklyn,M,1,Beagle,1014.0
1,1,Brooklyn,M,10,Boxer,756.0
2,2,Bronx,M,9,Maltese,516.0
3,3,Brooklyn,F,8,Pug,419.0
4,4,Bronx,M,10,Maltese,65.0
...,...,...,...,...,...,...
117619,121857,Manhattan,M,6,Boston Terrier,98.0
117620,121858,Brooklyn,F,10,"Collie, Border",250.0
117621,121859,Staten Island,F,5,German Shepherd Crossbreed,17008.0
117622,121860,Bronx,F,12,Yorkshire Terrier,79.0


In [85]:
NYC_dog_data_cleaned_df.dtypes

Borough         object
Gender          object
BirthMonth       int64
BreedName       object
CensusTract    float64
dtype: object

In [3]:
income_data_file_path = "Resources/income_by_year.csv"
NYC_income_data_cleaned = pd.read_csv(income_data_file_path)
NYC_income_data_df = pd.DataFrame(NYC_income_data_cleaned)
NYC_income_data_df.tail()

Unnamed: 0,CensusTract,2014,2015,2016,Grand Total
1308,1579.01,"$78,641.00","$82,981.00","$93,466.00","$85,029.33"
1309,1579.02,"$89,395.00","$95,592.00","$103,125.00","$96,037.33"
1310,1579.03,"$81,173.00","$76,330.00","$82,148.00","$79,883.67"
1311,1617.0,"$88,125.00","$73,929.00","$76,045.00","$79,366.33"
1312,1621.0,"$74,861.00","$67,425.00","$65,114.00","$69,133.33"


In [110]:
# Round census tract column of the income data DataFrame to 1 decimal place to match NYC dog data DataFrame
NYC_income_data_df['CensusTract'] = NYC_income_data_df['CensusTract'].round(decimals=2)

NYC_income_data_df.head()

Unnamed: 0,CensusTract,2014,2015,2016,Grand Total
0,1.0,"$102,825.00","$99,372.50","$108,892.00","$103,696.50"
1,2.0,"$57,469.00","$57,005.33","$62,204.00","$58,892.78"
2,2.0,"$23,036.00","$20,521.00","$21,102.00","$21,553.00"
3,2.0,"$29,418.00","$29,684.00","$32,411.00","$30,504.33"
4,3.0,"$57,500.00","$59,688.00","$70,078.00","$62,422.00"


In [5]:
# Retrieve the (cleaned) length of NYC_dog_data_df
len(NYC_dog_data_df)

117624

In [6]:
# Analyze the data types of the cleaned columns 
NYC_dog_data_cleaned_df.dtypes

Unnamed: 0                   int64
AnimalName                  object
AnimalGender                object
AnimalBirthMonth             int64
BreedName                   object
Borough                     object
ZipCode                      int64
CommunityDistrict          float64
CensusTract                float64
NTA                         object
CityCouncilDistrict        float64
CongressionalDistrict      float64
StateSenatorialDistrict    float64
LicenseIssuedDate           object
LicenseExpiredDate          object
dtype: object

In [7]:
# Find the total number of adopted animals in NYC boroughs
NYC_dog_data_cleaned_df.groupby('Borough')['AnimalName'].count()

Borough
Bronx            12309
Brooklyn         28893
Manhattan        41288
Queens           24047
Staten Island    10852
Name: AnimalName, dtype: int64

In [116]:
# initialize lists 
data = [['Bronx', 12309], ['Brooklyn', 28893], ['Manhattan', 41288], ['Queens', 24047], ['Staten Island', 10852]]
  
# Create a new DataFrame for total dog population in each NYC borough
NYC_dog_pop_df = pd.DataFrame(data, columns = ['borough', 'dog_population'])

NYC_dog_pop_df

Unnamed: 0,borough,dog_population
0,Bronx,12309
1,Brooklyn,28893
2,Manhattan,41288
3,Queens,24047
4,Staten Island,10852


In [108]:
# Create a new df of dog gender and boroughs
gender_boroughs_df = NYC_dog_data_cleaned_df.filter(['UID','Borough','Gender'], axis=1)
gender_boroughs_df

Unnamed: 0,UID,Borough,Gender
0,0,Brooklyn,M
1,1,Brooklyn,M
2,2,Bronx,M
3,3,Brooklyn,F
4,4,Bronx,M
...,...,...,...
117619,121857,Manhattan,M
117620,121858,Brooklyn,F
117621,121859,Staten Island,F
117622,121860,Bronx,F


In [50]:
# Find the number of male and female dogs adopted in NYC
gender_boroughs_df.groupby(['Borough', 'AnimalGender'])['AnimalGender'].count()

Borough        AnimalGender
Bronx          F                5391
               M                6937
Brooklyn       F               13166
               M               15784
Manhattan      F               19247
               M               22080
Queens         F               10675
               M               13479
Staten Island  F                4968
               M                5897
Name: AnimalGender, dtype: int64

In [107]:
# Create a dataframe based off dog birth months
birth_month_boroughs_df = NYC_dog_data_cleaned_df.filter(['UID','Borough','BirthMonth'], axis=1)


birth_month_boroughs_df

Unnamed: 0,UID,Borough,BirthMonth
0,0,Brooklyn,1
1,1,Brooklyn,10
2,2,Bronx,9
3,3,Brooklyn,8
4,4,Bronx,10
...,...,...,...
117619,121857,Manhattan,6
117620,121858,Brooklyn,10
117621,121859,Staten Island,5
117622,121860,Bronx,12


In [15]:
# Find the top breeds adopted in NYC boroughs
NYC_dog_breed_df['BreedName'].value_counts()

Unknown                    16419
Yorkshire Terrier           7358
Shih Tzu                    6848
Chihuahua                   5554
Labrador Retriever          4135
                           ...  
Pyrenean Shepherd              1
Afghan Hound Crossbreed        1
Finnish Lapphund               1
Swedish Vallhund               1
Neapolitan Mastiff             1
Name: BreedName, Length: 299, dtype: int64

In [106]:
# Create a dataframe based off dog birth months
breeds_boroughs_df = NYC_dog_data_cleaned_df.filter(['UID','Borough','BreedName'], axis=1)

breeds_boroughs_df

Unnamed: 0,UID,Borough,BreedName
0,0,Brooklyn,Beagle
1,1,Brooklyn,Boxer
2,2,Bronx,Maltese
3,3,Brooklyn,Pug
4,4,Bronx,Maltese
...,...,...,...
117619,121857,Manhattan,Boston Terrier
117620,121858,Brooklyn,"Collie, Border"
117621,121859,Staten Island,German Shepherd Crossbreed
117622,121860,Bronx,Yorkshire Terrier


In [114]:
# Connect to postgres/pgAdmin
sql_pgadmin = psycopg2.connect(user="postgres", password="Galile0", host="localhost", port="5432", database="dog_licenses_db")


In [115]:
# Convert DataFrames into csvs

NYC_dog_data_cleaned_df.to_csv('Resources/cleaned_NYC_dog_data.csv', index=False)

NYC_dog_pop_df.to_csv('Resources/NYC_dog_pop_df.csv', index=False)

gender_boroughs_df.to_csv('Resources/NYC_dog_genders.csv', index=False)

birth_month_boroughs_df.to_csv('Resources/NYC_dog_birth_months.csv', index=False)

breeds_boroughs_df.to_csv('Resources/NYC_breed_names.csv', index=False)

In [27]:
# Test connection to database
query = "SELECT * FROM borough_dog_pop"
example_df = pd.read_sql(query, sql_pgadmin)
example_df

Unnamed: 0,borough,dog_population
0,Bronx,12611
1,Brooklyn,30351
2,Manhattan,42607
3,Queens,25138
4,Staten Island,11155
