In [1]:
# Import dependencies

import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import psycopg2

In [2]:
# Load the cleaned NYC dogs dataset and convert to Dataframe
dog_data_file_path = "Resources/NYC_dogs_clean_forclass.csv"
NYC_dog_data_cleaned = pd.read_csv(dog_data_file_path)
NYC_dog_data_df = pd.DataFrame(NYC_dog_data_cleaned)
NYC_dog_data_df.tail()

Unnamed: 0.1,Unnamed: 0,AnimalName,AnimalGender,AnimalBirthMonth,BreedName,Borough,ZipCode,CommunityDistrict,CensusTract,NTA,CityCouncilDistrict,CongressionalDistrict,StateSenatorialDistrict,LicenseIssuedDate,LicenseExpiredDate
117619,121857,ROCKY,M,6,Boston Terrier,Manhattan,10022,106.0,98.0,MN19,4.0,12.0,28.0,12/31/16,12/31/18
117620,121858,ROXY,F,10,"Collie, Border",Brooklyn,11219,311.0,250.0,BK28,38.0,10.0,22.0,12/31/16,9/18/18
117621,121859,VALENTINA,F,5,German Shepherd Crossbreed,Staten Island,10312,503.0,17008.0,SI48,51.0,11.0,24.0,12/31/16,12/31/17
117622,121860,VENUS,F,12,Yorkshire Terrier,Bronx,10455,201.0,79.0,BX34,8.0,15.0,29.0,12/31/16,6/6/17
117623,121861,WILLOW,F,3,Unknown,Manhattan,10025,107.0,195.0,MN09,7.0,10.0,31.0,12/31/16,1/15/18


In [3]:
income_data_file_path = "Resources/income_by_year.csv"
NYC_income_data_cleaned = pd.read_csv(income_data_file_path)
NYC_income_data_df = pd.DataFrame(NYC_income_data_cleaned)
NYC_income_data_df.tail()

Unnamed: 0,CensusTract,2014,2015,2016,Grand Total
1308,1579.01,"$78,641.00","$82,981.00","$93,466.00","$85,029.33"
1309,1579.02,"$89,395.00","$95,592.00","$103,125.00","$96,037.33"
1310,1579.03,"$81,173.00","$76,330.00","$82,148.00","$79,883.67"
1311,1617.0,"$88,125.00","$73,929.00","$76,045.00","$79,366.33"
1312,1621.0,"$74,861.00","$67,425.00","$65,114.00","$69,133.33"


In [4]:
# Round census tract column of the income data DataFrame to 1 decimal place to match NYC dog data DataFrame
NYC_income_data_df['CensusTract'] = NYC_income_data_df['CensusTract'].round(decimals=1)

NYC_income_data_df.head()

Unnamed: 0,CensusTract,2014,2015,2016,Grand Total
0,1.0,"$102,825.00","$99,372.50","$108,892.00","$103,696.50"
1,2.0,"$57,469.00","$57,005.33","$62,204.00","$58,892.78"
2,2.0,"$23,036.00","$20,521.00","$21,102.00","$21,553.00"
3,2.0,"$29,418.00","$29,684.00","$32,411.00","$30,504.33"
4,3.0,"$57,500.00","$59,688.00","$70,078.00","$62,422.00"


In [5]:
# Retrieve the (cleaned) length of NYC_dog_data_df
len(NYC_dog_data_df)

117624

In [6]:
# Analyze the data types of the cleaned columns 
NYC_dog_data_df.dtypes

Unnamed: 0                   int64
AnimalName                  object
AnimalGender                object
AnimalBirthMonth             int64
BreedName                   object
Borough                     object
ZipCode                      int64
CommunityDistrict          float64
CensusTract                float64
NTA                         object
CityCouncilDistrict        float64
CongressionalDistrict      float64
StateSenatorialDistrict    float64
LicenseIssuedDate           object
LicenseExpiredDate          object
dtype: object

In [7]:
# Find the total number of adopted animals in NYC boroughs
NYC_dog_data_df.groupby('Borough')['AnimalName'].count()

Borough
Bronx            12309
Brooklyn         28893
Manhattan        41288
Queens           24047
Staten Island    10852
Name: AnimalName, dtype: int64

In [8]:
# initialize lists 
data = [['Bronx', 12309], ['Brooklyn', 28893], ['Manhattan', 41288], ['Queens', 24047], ['Staten Island', 10852]]
  
# Create a new DataFrame for total dog population in each NYC borough
NYC_dog_pop_df = pd.DataFrame(data, columns = ['borough', 'dog_population'])

NYC_dog_pop_df

Unnamed: 0,borough,dog_population
0,Bronx,12309
1,Brooklyn,28893
2,Manhattan,41288
3,Queens,24047
4,Staten Island,10852


In [9]:
# Create a new df of dog gender and boroughs
gender_boroughs_df = NYC_dog_data_df.filter(['AnimalGender','Borough'], axis=1)
gender_boroughs_df

Unnamed: 0,AnimalGender,Borough
0,M,Brooklyn
1,M,Brooklyn
2,M,Bronx
3,F,Brooklyn
4,M,Bronx
...,...,...
117619,M,Manhattan
117620,F,Brooklyn
117621,F,Staten Island
117622,F,Bronx


In [10]:
# Find the number of male and female dogs adopted in NYC
gender_boroughs_df.groupby(['Borough', 'AnimalGender'])['AnimalGender'].count()

Borough        AnimalGender
Bronx          F                5391
               M                6937
Brooklyn       F               13166
               M               15784
Manhattan      F               19247
               M               22080
Queens         F               10675
               M               13479
Staten Island  F                4968
               M                5897
Name: AnimalGender, dtype: int64

In [11]:
# Create a dataframe based off dog birth months
birth_month_boroughs_df = NYC_dog_data_df.filter(['AnimalBirthMonth','Borough'], axis=1)

birth_month_boroughs_df

Unnamed: 0,AnimalBirthMonth,Borough
0,1,Brooklyn
1,10,Brooklyn
2,9,Bronx
3,8,Brooklyn
4,10,Bronx
...,...,...
117619,6,Manhattan
117620,10,Brooklyn
117621,5,Staten Island
117622,12,Bronx


In [12]:
# Find the total number of animals born each month
data = birth_month_boroughs_df.groupby(['Borough','AnimalBirthMonth'])['AnimalBirthMonth'].count()

# Create a new DataFrame for total dog population in each NYC borough
NYC_dog_birth_df = pd.DataFrame(data)

NYC_dog_birth_df

Unnamed: 0_level_0,Unnamed: 1_level_0,AnimalBirthMonth
Borough,AnimalBirthMonth,Unnamed: 2_level_1
Bronx,1,2346
Bronx,2,827
Bronx,3,916
Bronx,4,915
Bronx,5,873
Bronx,6,944
Bronx,7,887
Bronx,8,927
Bronx,9,900
Bronx,10,966


In [13]:
# Create a df of NYC dog breeds in boroughs
NYC_dog_breed_df = NYC_dog_data_df.filter(['BreedName','Borough'], axis=1)

NYC_dog_breed_df

Unnamed: 0,BreedName,Borough
0,Beagle,Brooklyn
1,Boxer,Brooklyn
2,Maltese,Bronx
3,Pug,Brooklyn
4,Maltese,Bronx
...,...,...
117619,Boston Terrier,Manhattan
117620,"Collie, Border",Brooklyn
117621,German Shepherd Crossbreed,Staten Island
117622,Yorkshire Terrier,Bronx


In [14]:
# Rename columns and create and updated dog birth df
updated_NYC_dog_birth_df = NYC_dog_birth_df.rename(columns={'AnimalBirthMonth': 'Total'})

updated_NYC_dog_birth_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Total
Borough,AnimalBirthMonth,Unnamed: 2_level_1
Bronx,1,2346
Bronx,2,827
Bronx,3,916
Bronx,4,915
Bronx,5,873
Bronx,6,944
Bronx,7,887
Bronx,8,927
Bronx,9,900
Bronx,10,966


In [15]:
# Find the top breeds adopted in NYC boroughs
NYC_dog_breed_df['BreedName'].value_counts()

Unknown                    16419
Yorkshire Terrier           7358
Shih Tzu                    6848
Chihuahua                   5554
Labrador Retriever          4135
                           ...  
Pyrenean Shepherd              1
Afghan Hound Crossbreed        1
Finnish Lapphund               1
Swedish Vallhund               1
Neapolitan Mastiff             1
Name: BreedName, Length: 299, dtype: int64

In [16]:
# Find the top types of breeds adopted in the Bronx
bronx_breeds = NYC_dog_breed_df[NYC_dog_breed_df['Borough'] == 'Bronx'].groupby('Borough')['BreedName'].value_counts()

# Convert to a pandas DataFrame
bronx_breeds_df = pd.DataFrame(data= bronx_breeds)

bronx_breeds_df

Unnamed: 0_level_0,Unnamed: 1_level_0,BreedName
Borough,BreedName,Unnamed: 2_level_1
Bronx,Unknown,1508
Bronx,Yorkshire Terrier,1183
Bronx,Shih Tzu,1083
Bronx,Chihuahua,845
Bronx,American Pit Bull Terrier/Pit Bull,718
Bronx,...,...
Bronx,Pug Crossbreed,1
Bronx,Rat Terrier Crossbreed,1
Bronx,Russell Terrier,1
Bronx,Thai Ridgeback,1


In [17]:
# Find the top types of breeds adopted in Brooklyn
brooklyn_breeds = NYC_dog_breed_df[NYC_dog_breed_df['Borough'] == 'Brooklyn'].groupby('Borough')['BreedName'].value_counts()

# Convert to a pandas DataFrame
brooklyn_breeds_df = pd.DataFrame(data= brooklyn_breeds)

brooklyn_breeds_df

Unnamed: 0_level_0,Unnamed: 1_level_0,BreedName
Borough,BreedName,Unnamed: 2_level_1
Brooklyn,Unknown,4017
Brooklyn,Yorkshire Terrier,1873
Brooklyn,Shih Tzu,1799
Brooklyn,Chihuahua,1420
Brooklyn,American Pit Bull Mix / Pit Bull Mix,1040
Brooklyn,...,...
Brooklyn,Spanish Water Dog,1
Brooklyn,Sussex Spaniel,1
Brooklyn,Swedish Vallhund,1
Brooklyn,Thai Ridgeback,1


In [18]:
# Find the top types of breeds adopted in Manhattan
manhattan_breeds = NYC_dog_breed_df[NYC_dog_breed_df['Borough'] == 'Manhattan'].groupby('Borough')['BreedName'].value_counts()

# Convert to a pandas DataFrame
manhattan_breeds_df = pd.DataFrame(data= manhattan_breeds)

manhattan_breeds_df

Unnamed: 0_level_0,Unnamed: 1_level_0,BreedName
Borough,BreedName,Unnamed: 2_level_1
Manhattan,Unknown,5568
Manhattan,Yorkshire Terrier,1986
Manhattan,Chihuahua,1874
Manhattan,Shih Tzu,1768
Manhattan,Labrador Retriever,1654
Manhattan,...,...
Manhattan,Polish Hound,1
Manhattan,Portuguese Sheepdog,1
Manhattan,Russian Wolfhound,1
Manhattan,Small Munsterlander Pointer,1


In [19]:
# Find the top types of breeds adopted in Queens
queens_breeds = NYC_dog_breed_df[NYC_dog_breed_df['Borough'] == 'Queens'].groupby('Borough')['BreedName'].value_counts()

# Convert to a pandas DataFrame
queens_breeds_df = pd.DataFrame(data= queens_breeds)

queens_breeds_df

Unnamed: 0_level_0,Unnamed: 1_level_0,BreedName
Borough,BreedName,Unnamed: 2_level_1
Queens,Unknown,3580
Queens,Yorkshire Terrier,1617
Queens,Shih Tzu,1493
Queens,Maltese,1116
Queens,Chihuahua,1068
Queens,...,...
Queens,Thai Ridgeback,1
Queens,Tibetan Mastiff,1
Queens,Treeing Walker Coonhound,1
Queens,Welsh Terrier,1


In [20]:
# Find the top types of breeds adopted in Staten Island
staten_island_breeds = NYC_dog_breed_df[NYC_dog_breed_df['Borough'] == 'Staten Island'].groupby('Borough')['BreedName'].value_counts()

# Convert to a pandas DataFrame
staten_island_breeds_df = pd.DataFrame(data= staten_island_breeds)

staten_island_breeds_df

Unnamed: 0_level_0,Unnamed: 1_level_0,BreedName
Borough,BreedName,Unnamed: 2_level_1
Staten Island,Unknown,1746
Staten Island,Shih Tzu,705
Staten Island,Yorkshire Terrier,699
Staten Island,Labrador Retriever,572
Staten Island,Maltese,388
Staten Island,...,...
Staten Island,St. Bernard,1
Staten Island,Standard Schnauzer,1
Staten Island,Tibetan Spaniel,1
Staten Island,Toy Fox Terrier,1


In [21]:
# Join the Dog Data DataFrame to the Income Data DataFrame
NYC_joined_data_df = NYC_dog_data_df.join(NYC_income_data_df.set_index('CensusTract'), how = 'left', on ='CensusTract')

NYC_joined_data_df.head()


Unnamed: 0.1,Unnamed: 0,AnimalName,AnimalGender,AnimalBirthMonth,BreedName,Borough,ZipCode,CommunityDistrict,CensusTract,NTA,CityCouncilDistrict,CongressionalDistrict,StateSenatorialDistrict,LicenseIssuedDate,LicenseExpiredDate,2014,2015,2016,Grand Total
0,0,SHADOW,M,1,Beagle,Brooklyn,11236,318.0,1014.0,BK50,46.0,8.0,19.0,12/29/14,1/30/16,"$70,833.00","$60,969.00","$60,625.00","$64,142.33"
1,1,ROCCO,M,10,Boxer,Brooklyn,11210,314.0,756.0,BK43,45.0,9.0,17.0,1/7/15,1/30/16,"$66,090.00","$62,935.00","$83,026.00","$70,683.67"
2,2,LUIGI,M,9,Maltese,Bronx,10464,210.0,516.0,BX10,13.0,14.0,34.0,1/17/15,2/2/16,"$68,259.00","$65,695.50","$70,019.50","$67,991.33"
2,2,LUIGI,M,9,Maltese,Bronx,10464,210.0,516.0,BX10,13.0,14.0,34.0,1/17/15,2/2/16,"$31,062.00","$34,241.00","$42,220.00","$35,841.00"
2,2,LUIGI,M,9,Maltese,Bronx,10464,210.0,516.0,BX10,13.0,14.0,34.0,1/17/15,2/2/16,"$38,802.00","$38,813.00","$49,321.00","$42,312.00"


In [22]:
# Connect to postgres/pgAdmin
sql_pgadmin = psycopg2.connect(user="postgres", password="", host="localhost", port="5432", database="dog_licenses_db")


In [23]:
# Converting NYC_joined_data_df to csv
NYC_joined_data_df.to_csv('Resources/NYC_joined_data_df.csv', index=False)

In [24]:
# Converting NYC_dog_pop_df to csv
NYC_dog_pop_df.to_csv('Resources/NYC_dog_pop_df.csv', index=False)

In [27]:
# Test connection to database
query = "SELECT * FROM borough_dog_pop"
example_df = pd.read_sql(query, sql_pgadmin)
example_df

Unnamed: 0,borough,dog_population
0,Bronx,12611
1,Brooklyn,30351
2,Manhattan,42607
3,Queens,25138
4,Staten Island,11155
