In [33]:
# Import dependencies

import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import psycopg2

In [7]:
# Load the dataset and convert to Dataframe
file_path = "Resources/NYC_dogs_clean.csv"
NYC_dog_data = pd.read_csv(file_path)
NYC_dog_data_df = pd.DataFrame(NYC_dog_data)
NYC_dog_data_df.head()

Unnamed: 0.1,Unnamed: 0,X,RowNumber,AnimalName,AnimalGender,AnimalBirthMonth,BreedName,Borough,ZipCode,CommunityDistrict,CensusTract2010,NTA,CityCouncilDistrict,CongressionalDistrict,StateSenatorialDistrict,LicenseIssuedDate,LicenseExpiredDate
0,1,1,1753,SHADOW,M,01/01/2000 12:00:00 AM,Beagle,Brooklyn,11236,318.0,1014.0,BK50,46.0,8.0,19.0,12/29/2014,01/30/2016
1,2,2,2415,ROCCO,M,10/01/2011 12:00:00 AM,Boxer,Brooklyn,11210,314.0,756.0,BK43,45.0,9.0,17.0,01/07/2015,01/30/2016
2,3,3,3328,LUIGI,M,09/01/2005 12:00:00 AM,Maltese,Bronx,10464,210.0,516.0,BX10,13.0,14.0,34.0,01/17/2015,02/02/2016
3,4,4,7537,PETUNIA,F,08/01/2013 12:00:00 AM,Pug,Brooklyn,11221,304.0,419.0,BK78,34.0,7.0,18.0,03/01/2015,03/28/2016
4,5,5,8487,ROMEO,M,10/01/2008 12:00:00 AM,Maltese,Bronx,10451,201.0,65.0,BX34,17.0,15.0,32.0,03/09/2015,03/09/2016


In [8]:
# Lenght of NYC_dog_data_df
len(NYC_dog_data_df)

121862

In [9]:
# dtypes of NYC_dog_data_df
NYC_dog_data_df.dtypes

Unnamed: 0                   int64
X                            int64
RowNumber                    int64
AnimalName                  object
AnimalGender                object
AnimalBirthMonth            object
BreedName                   object
Borough                     object
ZipCode                      int64
CommunityDistrict          float64
CensusTract2010            float64
NTA                         object
CityCouncilDistrict        float64
CongressionalDistrict      float64
StateSenatorialDistrict    float64
LicenseIssuedDate           object
LicenseExpiredDate          object
dtype: object

In [10]:
# Creating a new df of animal gender and borough
boroughs_df = NYC_dog_data_df.filter(['AnimalGender','Borough'], axis=1)
boroughs_df

Unnamed: 0,AnimalGender,Borough
0,M,Brooklyn
1,M,Brooklyn
2,M,Bronx
3,F,Brooklyn
4,M,Bronx
...,...,...
121857,M,Manhattan
121858,F,Brooklyn
121859,F,Staten Island
121860,F,Bronx


In [11]:
# Creating a new df of Bronx animal gender
bronx_df = boroughs_df[boroughs_df['Borough'].str.contains('Bronx')]
bronx_df

Unnamed: 0,AnimalGender,Borough
2,M,Bronx
4,M,Bronx
9,M,Bronx
13,M,Bronx
16,F,Bronx
...,...,...
121829,F,Bronx
121834,F,Bronx
121843,F,Bronx
121846,M,Bronx


In [12]:
len(bronx_df)

12611

In [13]:
# Creating a new df of Brooklyn animal gender
brooklyn_df = boroughs_df[boroughs_df['Borough'].str.contains('Brooklyn')]
brooklyn_df

Unnamed: 0,AnimalGender,Borough
0,M,Brooklyn
1,M,Brooklyn
3,F,Brooklyn
5,M,Brooklyn
8,F,Brooklyn
...,...,...
121839,M,Brooklyn
121840,M,Brooklyn
121847,F,Brooklyn
121851,M,Brooklyn


In [14]:
len(brooklyn_df)

30351

In [15]:
# Creating a new df of Manhattan animal gender
manhattan_df = boroughs_df[boroughs_df['Borough'].str.contains('Manhattan')]
manhattan_df

Unnamed: 0,AnimalGender,Borough
6,M,Manhattan
10,F,Manhattan
27,M,Manhattan
30,M,Manhattan
37,M,Manhattan
...,...,...
121836,M,Manhattan
121837,M,Manhattan
121853,F,Manhattan
121857,M,Manhattan


In [16]:
len(manhattan_df)

42607

In [17]:
## Creating a new df of Queens animal gender
queens_df = boroughs_df[boroughs_df['Borough'].str.contains('Queens')]
queens_df

Unnamed: 0,AnimalGender,Borough
11,F,Queens
12,F,Queens
14,M,Queens
15,F,Queens
23,M,Queens
...,...,...
121848,F,Queens
121849,F,Queens
121854,M,Queens
121855,M,Queens


In [18]:
len(queens_df)

25138

In [19]:
# Creating a new df of Staten Island animal gender
staten_island_df = boroughs_df[boroughs_df['Borough'].str.contains('Staten Island')]
staten_island_df

Unnamed: 0,AnimalGender,Borough
7,F,Staten Island
78,F,Staten Island
96,M,Staten Island
141,M,Staten Island
148,F,Staten Island
...,...,...
121814,M,Staten Island
121821,M,Staten Island
121850,F,Staten Island
121852,F,Staten Island


In [20]:
len(staten_island_df)

11155

In [21]:
# initialize list of lists
data = [['Bronx', 12611], ['Brooklyn', 30351], ['Manhattan', 42607], ['Queens', 25138], ['Staten Island', 11155]]
  
# Creating a new df total dog population in each borough
borough_dog_population_df = pd.DataFrame(data, columns = ['Borough', 'Dog Population'])
  
# print dataframe
borough_dog_population_df


Unnamed: 0,Borough,Dog Population
0,Bronx,12611
1,Brooklyn,30351
2,Manhattan,42607
3,Queens,25138
4,Staten Island,11155


In [27]:
# Connecting to postgres/pgAdmin
sql_pgadmin = psycopg2.connect(user="postgres", password="", host="localhost", port="5432", database="mock_data")


In [25]:
# Converting borough_dog_population_df df to csv
borough_dog_population_df.to_csv('Resources/borough_dog_population.csv', index=False)

In [26]:
# Converting boroughs df to csv
boroughs_df.to_csv('Resources/boroughs.csv', index=False)

In [30]:
# Testing database to make sure they're connected
query = "SELECT borough FROM borough_dog_pop"
sample_df = pd.read_sql(query, sql_pgadmin)
sample_df

Unnamed: 0,borough
0,Bronx
1,Brooklyn
2,Manhattan
3,Queens
4,Staten Island
