# Brooklyn Home Sales - Data Plan
**by Group 4**


This plan illustrates how we will transform the original data and ingest it into the new PostgreSQL database. In order to do this, we will go through the following steps:
1. Connect to the PostgreSQL server and database
2. Load and inspect the original data
3. Transform the original data into necessary tables
4. Ingest the transformed data into the PostgreSQL database and the respective tables

First, we will import the required packages.

In [1]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

We will now establish the connection with our PostgreSQL server and the database, as well as create the tables based on the schema we designed.

In [49]:
## Pass the connection string to a variable, conn_url
conn_url = 'postgresql://postgres:5ho7gvcd@f19server.apan5310.com:50204/team4_brooklyn'

# Create an engine that connects to PostgreSQL server
engine = create_engine(conn_url)

# Establish a connection
connection = engine.connect()

############################################################################################################
# Pass the SQL statements that create all tables
stmt = """
CREATE TABLE owners (
             owner_id int,
             owner_name	varchar(100) NOT NULL,
             owner_type	char(1),
             PRIMARY KEY (owner_id)
             );

CREATE TABLE addresses (
             address_id int,
             address varchar(100) NOT NULL,
             zipcode int,
             PRIMARY KEY (address_id)
             );
             

CREATE TABLE neighborhoods (
             neighborhood_id int,
             neighborhood varchar(100) NOT NULL,
             PRIMARY KEY (neighborhood_id)
             );

CREATE TABLE building_class_categories (
             building_class_category_id int,
             building_class_category varchar(100) NOT NULL,
             PRIMARY KEY (building_class_category_id)
             );

CREATE TABLE building_classes (
             building_class_id int,
             building_class char(2) NOT NULL,
             PRIMARY KEY (building_class_id)
);

CREATE TABLE building_class_categories_combinations (
             building_class_id int,
             building_class_category_id int,
             PRIMARY KEY (building_class_id, building_class_category_id),
             FOREIGN KEY (building_class_id) REFERENCES building_classes (building_class_id),
             FOREIGN KEY (building_class_category_id) REFERENCES building_class_categories (building_class_category_id) 
             );

CREATE TABLE tax_classes (
             tax_class_id int,
             tax_class varchar(10) NOT NULL,
             PRIMARY KEY (tax_class_id)
             );

CREATE TABLE community_districts (
             community_district_id int,
             community_district int NOT NULL,
             population int,
             area_sq_mi numeric(2,1),
             male_perc numeric(2,1),
             female_perc numeric(2,1),
             foreign_born_pop numeric(2,1),
             majority_race_pop varchar(10),
             access_to_parks int,
             commute_to_work_in_min numeric(2,1),
             education_bachelor_level numeric(2,1),
             CHECK (majority_race_pop IN ('White','Black','Hispanic','Other')),
             PRIMARY KEY (community_district_id)
             );

CREATE TABLE council_districts (
             city_council_district_id int,
             city_council_district int NOT NULL,
             PRIMARY KEY (city_council_district_id)
             );

CREATE TABLE school_districts (
             school_district_id int,
             school_district int NOT NULL,
             students_enrolled int,
             nr_schools int,
             school_dist_address_id int,
             PRIMARY KEY (school_district_id),
             FOREIGN KEY (school_dist_address_id) REFERENCES addresses (address_id)
             );

CREATE TABLE fire_companies (
             fire_company_id int,
             fire_company char(4) NOT NULL,
             fire_comp_address_id int,
             PRIMARY KEY (fire_company_id),
             FOREIGN KEY (fire_comp_address_id) REFERENCES addresses (address_id)
            );

CREATE TABLE police_precincts (
             police_precinct_id int,
             police_precinct int NOT NULL,
             police_prct_address_id int,
             phone_number varchar(12),
             PRIMARY KEY (police_precinct_id),
             FOREIGN KEY (police_prct_address_id) REFERENCES addresses (address_id)
             );

CREATE TABLE properties (
             property_id int,
             address_id int,
             neighborhood_id int NOT NULL,
             building_class_category_id int,
             building_class_id int,
             tax_class_id int,
             community_district_id int,
             city_council_district_id int,
             school_district_id int,
             fire_company_id int,
             police_precinct_id int,
             land_sqft numeric(10,1) NOT NULL,
             total_units int NOT NULL,
             res_units int NOT NULL,
             comm_units int NOT NULL,
             block int NOT NULL,
             lot int NOT NULL,
             PRIMARY KEY (property_id),
             FOREIGN KEY (neighborhood_id) REFERENCES neighborhoods (neighborhood_id),
             FOREIGN KEY (address_id) REFERENCES addresses (address_id),
             FOREIGN KEY (building_class_category_id) REFERENCES building_class_categories (building_class_category_id),
             FOREIGN KEY (building_class_id) REFERENCES building_classes (building_class_id),
             FOREIGN KEY (community_district_id) REFERENCES community_districts (community_district_id),
             FOREIGN KEY (city_council_district_id) REFERENCES council_districts (city_council_district_id),
             FOREIGN KEY (school_district_id) REFERENCES school_districts (school_district_id),
             FOREIGN KEY (fire_company_id) REFERENCES fire_companies (fire_company_id),
             FOREIGN KEY (police_precinct_id) REFERENCES police_precincts (police_precinct_id),
             FOREIGN KEY (tax_class_id) REFERENCES tax_classes (tax_class_id)
             );

CREATE TABLE property_coordinates (
             property_id int,
             x_cord int,
             y_cord int,
             PRIMARY KEY (property_id),
             FOREIGN KEY (property_id) REFERENCES properties (property_id)
             );

CREATE TABLE property_sales (
             sale_id int,
             property_id int NOT NULL,
             sale_price numeric (20,1) NOT NULL,
             sale_date date NOT NULL,
             year_of_sale numeric(4,0) NOT NULL,
             tax_class_id int,
             apartment_number varchar(20),
             gross_sqft numeric(20,1) NOT NULL,
             PRIMARY KEY (sale_id),
             FOREIGN KEY (property_id) REFERENCES properties (property_id),
             FOREIGN KEY (tax_class_id) REFERENCES tax_classes (tax_class_id)
             );

CREATE TABLE owns (
             owner_id int,
             property_id int,
             PRIMARY KEY (owner_id, property_id),
             FOREIGN KEY (owner_id) REFERENCES owners (owner_id),
             FOREIGN KEY (property_id) REFERENCES properties (property_id)
             );

"""

connection.execute(stmt)


<sqlalchemy.engine.result.ResultProxy at 0x7f30d0e82f60>

**Loading and Inspecting the Original Dataset**<br>
In this step, we will load the csv with the original dataset and look into its structure in order for us to accurately transform it into the necessary tables for the new relational database.

In [38]:
df=pd.read_csv("brooklyn.csv")

In [39]:
#add community_districts_info, school_district_info, fire_companies_info tables
cd_df=pd.read_csv('community_district_info.csv')
sd_df=pd.read_csv('school_districts_info.csv')
fc_df=pd.read_csv('Brooklyn_Fire_Stations.csv')
pc_df=pd.read_csv('police_precincts_info.csv')

#merge with original df
df = pd.merge(df, cd_df, how='left',
                    left_on='CD', right_on='community_district')
df = pd.merge(df, sd_df, how='left',
                    left_on='SchoolDist', right_on='school_district')
df = pd.merge(df, fc_df, how='left',
                    left_on='FireComp', right_on='fire_company')
df = pd.merge(df, pc_df, how='left',
                    left_on='PolicePrct', right_on='police_precinct')

In [9]:
df.head()

Unnamed: 0,address,total_units,land_sqft,residential_units,commercial_units,block,lot,sale_price,sale_date,year_of_sale,...,nr_schools,school_district_address,school_district_zipcode,fire_company,fire_company_address,fire_company_zipcode,police_precinct,police_precinct_address,police_precinct_zipcode,phone_number
0,21 CLARK STREET,1.0,20267.0,0.0,1.0,230.0,1.0,202500000.0,10/31/2017,2017.0,...,41.0,355 Park Ave,11238.0,E205,74 Middagh Street,11201.0,84.0,301 Gold Street,11201.0,718-875-6811
1,16 COURT STREET,102.0,12500.0,0.0,102.0,250.0,44.0,171000000.0,10/10/2017,2017.0,...,41.0,355 Park Ave,11238.0,E205,74 Middagh Street,11201.0,84.0,301 Gold Street,11201.0,718-875-6811
2,20 NORTH 12 STREET,0.0,60400.0,0.0,0.0,2287.0,16.0,160000000.0,4/19/2017,2017.0,...,38.0,215 Heyward St,11206.0,,,,94.0,100 Messerole Avenue,11222.0,718-383-3879
3,55 PROSPECT STREET,2.0,20704.0,0.0,2.0,63.0,1.0,138106368.0,3/31/2017,2017.0,...,41.0,355 Park Ave,11238.0,,,,84.0,301 Gold Street,11201.0,718-875-6811
4,90 SANDS STREET,1.0,21175.0,0.0,1.0,87.0,9.0,135000000.0,8/29/2017,2017.0,...,41.0,355 Park Ave,11238.0,,,,84.0,301 Gold Street,11201.0,718-875-6811


**Transforming Dataset into Tables**<br>
Based on our normalization plan and final schema design submitted in Checkpoint 3, we will now be creating the separate tables from the original data that we uploaded in the previous step. Once the data has been transformed to fit each table, we will load it up into the PosgreSQL database and the respective table.

### Owners Table

In [50]:
temp_own=df[['OwnerName','OwnerType']]
owners_df=temp_own.drop_duplicates(subset='OwnerName', keep='first').dropna()
owners_df.columns=['owner_name', 'owner_type']
owners_df.insert(0, 'owner_id', range(1, 1 + len(owners_df)))
owners_df.to_sql(name='owners', con=engine, if_exists='append', index=False)
print("Owners Table Loaded")

Owners Table Loaded


### Addresses Table

In [51]:
temp_add1=df[['address','zip_code']]
temp_add2=df[['school_district_address','school_district_zipcode']]
temp_add3=df[['police_precinct_address','police_precinct_zipcode']]
temp_add4=df[['fire_company_address','fire_company_zipcode']]

#drop duplicates
temp_add1=temp_add1.drop_duplicates()
temp_add2=temp_add2.drop_duplicates()
temp_add3=temp_add3.drop_duplicates()
temp_add4=temp_add4.drop_duplicates()

#drop nas
temp_add1=temp_add1.dropna()
temp_add2=temp_add2.dropna()
temp_add3=temp_add3.dropna()
temp_add4=temp_add4.dropna()

temp_add1.columns=['address','zipcode']
temp_add2.columns=['address','zipcode']
temp_add3.columns=['address','zipcode']
temp_add4.columns=['address','zipcode']

frames=[temp_add1, temp_add2,temp_add3,temp_add4]
address_df=pd.concat(frames)

address_df.insert(0, 'address_id', range(1, 1 + len(address_df)))
address_df.to_sql(name='addresses', con=engine, if_exists='append', index=False)
print('Address Table Loaded')

Address Table Loaded


### Neighborhood Table

In [52]:
temp_neigh=df[['neighborhood']]
temp_neigh=temp_neigh.drop_duplicates(keep='first')
temp_neigh=temp_neigh.dropna()
neighborhood_df=temp_neigh
neighborhood_df.columns=['neighborhood']
neighborhood_df.insert(0, 'neighborhood_id', range(1, 1 + len(neighborhood_df)))
neighborhood_df.to_sql(name='neighborhoods', con=engine, if_exists='append', index=False)
print('Neighborhood Table Loaded')

Neighborhood Table Loaded


### Building Class Categories Table

In [53]:
temp_bcc=df[['building_class_category']]
temp_bcc=temp_bcc.drop_duplicates(keep='first')
temp_bcc=temp_bcc.dropna()
building_class_category_df=temp_bcc
building_class_category_df.insert(0, 'building_class_category_id', range(1, 1 + len(building_class_category_df)))
building_class_category_df.to_sql(name='building_class_categories', con=engine, if_exists='append', index=False)
print('Building Class Category Table Loaded')

Building Class Category Table Loaded


### Building Classes Table

In [54]:
temp_bc=df[['building_class']]
temp_bc=temp_bc.drop_duplicates(keep='first')
temp_bc=temp_bc.dropna()
building_class_df=temp_bc
building_class_df.insert(0, 'building_class_id', range(1, 1 + len(building_class_df)))
building_class_df.to_sql(name='building_classes', con=engine, if_exists='append', index=False)
print('Building Class Table Loaded')

Building Class Table Loaded


### Building Class Category Combinations Table

In [55]:
temp_bcc_combo=df[['building_class','building_class_category']]
temp_bcc_combo=temp_bcc_combo.drop_duplicates(keep='first')
temp_bcc_combo=temp_bcc_combo.dropna()
building_class_categories_combinations_df=temp_bcc_combo
building_class_categories_combinations_df = pd.merge(building_class_categories_combinations_df,
                                                     building_class_df, how='left',
                                                     left_on='building_class', right_on='building_class')
building_class_categories_combinations_df = pd.merge(building_class_categories_combinations_df,
                                                     building_class_category_df, how='left',
                                                     left_on='building_class_category', right_on='building_class_category')
building_class_categories_combinations_df=building_class_categories_combinations_df[['building_class_id','building_class_category_id']]
building_class_categories_combinations_df.to_sql(name='building_class_categories_combinations', con=engine, if_exists='append', index=False)
print('Building_Class_Categories Combinations Table Loaded')

Building_Class_Categories Combinations Table Loaded


### Tax Class Table

In [56]:
temp_tax=df[['tax_class']]
temp_tax=temp_tax.drop_duplicates(keep='first')
temp_tax=temp_tax.dropna()
tax_class_df=temp_tax
tax_class_df.insert(0, 'tax_class_id', range(1, 1 + len(tax_class_df)))
tax_class_df.to_sql(name='tax_classes', con=engine, if_exists='append', index=False)
print('Tax Class Table Loaded')

Tax Class Table Loaded


### Community Districts Table

In [57]:
temp_cd=df[['community_district','population','area_sq_mi','male_perc','female_perc','foreign_born_pop',
            'majority_race_pop','access_to_parks','commute_to_work_in_min','education_bachelor_level']]
temp_cd=temp_cd.drop_duplicates(keep='first')
temp_cd=temp_cd.dropna()
community_district_df=temp_cd
community_district_df.insert(0, 'community_district_id', range(1, 1 + len(community_district_df)))
community_district_df["community_district"] = community_district_df["community_district"].astype(np.int64)

#turn values to numeric
community_district_df["community_district"] = community_district_df["community_district"].astype(np.int64)
community_district_df['population']=community_district_df['population'].astype(np.int64)
community_district_df.to_sql(name='community_districts', con=engine, if_exists='append', index=False)

print('Community District Table Loaded')

DataError: (psycopg2.errors.NumericValueOutOfRange) numeric field overflow
DETAIL:  A field with precision 2, scale 1 must round to an absolute value less than 10^1.

[SQL: INSERT INTO community_districts (community_district_id, community_district, population, area_sq_mi, male_perc, female_perc, foreign_born_pop, majority_race_pop, access_to_parks, commute_to_work_in_min, education_bachelor_level) VALUES (%(community_district_id)s, %(community_district)s, %(population)s, %(area_sq_mi)s, %(male_perc)s, %(female_perc)s, %(foreign_born_pop)s, %(majority_race_pop)s, %(access_to_parks)s, %(commute_to_work_in_min)s, %(education_bachelor_level)s)]
[parameters: ({'female_perc': 53.2, 'male_perc': 46.8, 'foreign_born_pop': 19.6, 'community_district': 302, 'commute_to_work_in_min': 35.3, 'access_to_parks': 99.0, 'area_sq_mi': 2.8, 'community_district_id': 1, 'education_bachelor_level': 61.1, 'majority_race_pop': 'White', 'population': 139000}, {'female_perc': 50.6, 'male_perc': 49.4, 'foreign_born_pop': 24.1, 'community_district': 301, 'commute_to_work_in_min': 34.5, 'access_to_parks': 97.0, 'area_sq_mi': 4.7, 'community_district_id': 2, 'education_bachelor_level': 47.8, 'majority_race_pop': 'White', 'population': 168000}, {'female_perc': 54.0, 'male_perc': 46.0, 'foreign_born_pop': 40.2, 'community_district': 316, 'commute_to_work_in_min': 47.5, 'access_to_parks': 77.0, 'area_sq_mi': 8.5, 'community_district_id': 3, 'education_bachelor_level': 31.1, 'majority_race_pop': 'Black', 'population': 212000}, {'female_perc': 50.5, 'male_perc': 49.5, 'foreign_born_pop': 32.9, 'community_district': 304, 'commute_to_work_in_min': 40.8, 'access_to_parks': 88.0, 'area_sq_mi': 2.0, 'community_district_id': 4, 'education_bachelor_level': 26.3, 'majority_race_pop': 'Hispanic', 'population': 135000}, {'female_perc': 52.4, 'male_perc': 47.6, 'foreign_born_pop': 16.5, 'community_district': 306, 'commute_to_work_in_min': 38.0, 'access_to_parks': 94.0, 'area_sq_mi': 3.1, 'community_district_id': 5, 'education_bachelor_level': 72.1, 'majority_race_pop': 'White', 'population': 119000}, {'female_perc': 49.5, 'male_perc': 50.5, 'foreign_born_pop': 29.1, 'community_district': 307, 'commute_to_work_in_min': 36.7, 'access_to_parks': 74.0, 'area_sq_mi': 3.6, 'community_district_id': 6, 'education_bachelor_level': 28.7, 'majority_race_pop': 'White', 'population': 164000}, {'female_perc': 53.2, 'male_perc': 46.8, 'foreign_born_pop': 41.6, 'community_district': 315, 'commute_to_work_in_min': 42.1, 'access_to_parks': 59.0, 'area_sq_mi': 2.9, 'community_district_id': 7, 'education_bachelor_level': 37.3, 'majority_race_pop': 'White', 'population': 164000}, {'female_perc': 54.8, 'male_perc': 45.2, 'foreign_born_pop': 41.1, 'community_district': 309, 'commute_to_work_in_min': 42.4, 'access_to_parks': 91.0, 'area_sq_mi': 1.6, 'community_district_id': 8, 'education_bachelor_level': 30.7, 'majority_race_pop': 'Black', 'population': 110000}  ... displaying 10 of 18 total bound parameter sets ...  {'female_perc': 56.6, 'male_perc': 43.4, 'foreign_born_pop': 52.2, 'community_district': 317, 'commute_to_work_in_min': 46.7, 'access_to_parks': 61.0, 'area_sq_mi': 3.4, 'community_district_id': 17, 'education_bachelor_level': 23.7, 'majority_race_pop': 'Black', 'population': 138000}, {'female_perc': 54.0, 'male_perc': 46.0, 'foreign_born_pop': 40.2, 'community_district': 318, 'commute_to_work_in_min': 47.5, 'access_to_parks': 77.0, 'area_sq_mi': 8.5, 'community_district_id': 18, 'education_bachelor_level': 31.1, 'majority_race_pop': 'Black', 'population': 212000})]
(Background on this error at: http://sqlalche.me/e/9h9h)

### Council Districts Table

In [58]:
temp_counc=df[['Council']]
temp_counc=temp_counc.drop_duplicates(keep='first')
temp_counc=temp_counc.dropna()
council_districts_df=temp_counc
council_districts_df.columns=['city_council_district']
council_districts_df.insert(0, 'city_council_district_id', range(1, 1 + len(council_districts_df)))
council_districts_df["city_council_district"] = council_districts_df["city_council_district"].astype(np.int64)
council_districts_df.to_sql(name='council_districts', con=engine, if_exists='append', index=False)
print('Council Districts Table Loaded')

Council Districts Table Loaded


### School Districts Table

In [59]:
temp_sc=df[['school_district','students_enrolled','nr_schools','school_district_address','school_district_zipcode']]
temp_sc=temp_sc.drop_duplicates(keep='first')
temp_sc=temp_sc.dropna()
school_districts_df=temp_sc
school_districts_df["school_district"] = school_districts_df["school_district"].astype(np.int64)
#address id added
school_districts_df = pd.merge(school_districts_df, address_df, how='left',
                         left_on='school_district_address', right_on='address')

school_districts_df.insert(0, 'school_district_id', range(1, 1 + len(school_districts_df)))
school_districts_df=school_districts_df[['school_district_id','school_district','students_enrolled',
                                        'nr_schools','address_id']]

school_districts_df.rename(columns={'address_id':'school_dist_address_id'},inplace=True)
school_districts_df.to_sql(name='school_districts', con=engine, if_exists='append', index=False)
print('School Districts Table Loaded')

School Districts Table Loaded


### Fire Companies Table

In [60]:
temp_fire=df[['fire_company','fire_company_address']]
temp_fire=temp_fire.drop_duplicates(keep='first')
temp_fire=temp_fire.dropna()
fire_companies_df=temp_fire

#add fire company address id
fire_companies_df = pd.merge(fire_companies_df, address_df, how='left',
                         left_on='fire_company_address', right_on='address')

fire_companies_df.insert(0, 'fire_company_id', range(1, 1 + len(fire_companies_df)))
fire_companies_df=fire_companies_df[['fire_company_id','fire_company','address_id']]
fire_companies_df.rename(columns={'address_id':'fire_comp_address_id'},inplace=True)
fire_companies_df.to_sql(name='fire_companies', con=engine, if_exists='append', index=False)
print('Fire Companies Table Loaded')

Fire Companies Table Loaded


### Police Precincts Table

In [61]:
temp_pol=df[['police_precinct','police_precinct_address','phone_number']]
temp_pol=temp_pol.drop_duplicates(keep='first')
temp_pol=temp_pol.dropna()
police_precincts_df=temp_pol
#add police precinct address id
police_precincts_df = pd.merge(police_precincts_df, address_df, how='left',
                         left_on='police_precinct_address', right_on='address')

police_precincts_df.insert(0, 'police_precinct_id', range(1, 1 + len(police_precincts_df)))
police_precincts_df=police_precincts_df[['police_precinct_id','police_precinct','address_id','phone_number']]
police_precincts_df.rename(columns={'address_id':'police_prct_address_id'},inplace=True)
police_precincts_df["police_precinct"] = police_precincts_df["police_precinct"].astype(np.int64)
police_precincts_df.to_sql(name='police_precincts', con=engine, if_exists='append', index=False)
print('Police Precincts Table Loaded')

Police Precincts Table Loaded


### Properties Table

In [62]:
df=df.dropna(how='all')
df=df.drop_duplicates(keep='first')

df2=df[['address','neighborhood','building_class_category',
                  'building_class','tax_class','CD','Council',
                  'SchoolDist','FireComp','PolicePrct','land_sqft','total_units',
                  'residential_units','commercial_units','block','lot','XCoord','YCoord',
                  'sale_price','sale_date','year_of_sale',
                  'apartment_number','gross_sqft','OwnerName']]

#ids to be added
#neighborhood
df3 = pd.merge(df2, neighborhood_df, how='left',
                         left_on='neighborhood', right_on='neighborhood')
#address
df4=pd.merge(df3, address_df, how='left',
                        left_on='address', right_on='address')

#building_class_category
df5=pd.merge(df4, building_class_category_df, how='left',
                        left_on='building_class_category', right_on='building_class_category')

#building_class
df6=pd.merge(df5, building_class_df, how='left',
                        left_on='building_class', right_on='building_class')

#tax_class
df7=pd.merge(df6, tax_class_df, how='left',
                        left_on='tax_class', right_on='tax_class')

#CD
df8=pd.merge(df7, community_district_df, how='left',
                    left_on='CD', right_on='community_district')

#Council
df9=pd.merge(df8, council_districts_df, how='left',
                        left_on='Council', right_on='city_council_district')

#SchoolDist
df10=pd.merge(df9, school_districts_df, how='left',
                        left_on='SchoolDist', right_on='school_district')

#fireComp
df11=pd.merge(df10, fire_companies_df, how='left',
                        left_on='FireComp', right_on='fire_company')


#PolicePrct
df12=pd.merge(df11, police_precincts_df, how='left',
                        left_on='PolicePrct', right_on='police_precinct')

#add property_id
df12.insert(0, 'property_id', range(1, 1 + len(df12)))


In [66]:
property_df=df12[['property_id','address_id', 'neighborhood_id', 'building_class_category_id',
                  'building_class_id','tax_class_id','community_district_id','city_council_district_id',
                  'school_district_id','fire_company_id','police_precinct_id','land_sqft','total_units',
                  'residential_units','commercial_units','block','lot']]

In [71]:
property_df.head()

Unnamed: 0,property_id,address_id,neighborhood_id,building_class_category_id,building_class_id,tax_class_id,community_district_id,city_council_district_id,school_district_id,fire_company_id,police_precinct_id,land_sqft,total_units,res_units,comm_units,block,lot
0,1,1,1,1,1.0,1.0,1.0,1.0,1.0,1.0,1.0,20267.0,1.0,0.0,1.0,230.0,1.0
1,2,2,1,2,2.0,1.0,1.0,1.0,1.0,1.0,1.0,12500.0,102.0,0.0,102.0,250.0,44.0
2,3,3,2,3,3.0,1.0,2.0,1.0,2.0,,2.0,60400.0,0.0,0.0,0.0,2287.0,16.0
3,4,4,3,2,4.0,1.0,1.0,1.0,1.0,,1.0,20704.0,2.0,0.0,2.0,63.0,1.0
4,5,5,3,1,1.0,1.0,1.0,1.0,1.0,,1.0,21175.0,1.0,0.0,1.0,87.0,9.0


In [68]:
property_df.rename(columns={'residential_units':'res_units','commercial_units':'comm_units'},inplace=True)
property_df.to_sql(name='properties', con=engine, if_exists='append', index=False)
print('Properties Table Loaded')

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  return super(DataFrame, self).rename(**kwargs)


IntegrityError: (psycopg2.errors.ForeignKeyViolation) insert or update on table "properties" violates foreign key constraint "properties_community_district_id_fkey"
DETAIL:  Key (community_district_id)=(1) is not present in table "community_districts".

[SQL: INSERT INTO properties (property_id, address_id, neighborhood_id, building_class_category_id, building_class_id, tax_class_id, community_district_id, city_council_district_id, school_district_id, fire_company_id, police_precinct_id, land_sqft, total_units, res_units, comm_units, block, lot) VALUES (%(property_id)s, %(address_id)s, %(neighborhood_id)s, %(building_class_category_id)s, %(building_class_id)s, %(tax_class_id)s, %(community_district_id)s, %(city_council_district_id)s, %(school_district_id)s, %(fire_company_id)s, %(police_precinct_id)s, %(land_sqft)s, %(total_units)s, %(res_units)s, %(comm_units)s, %(block)s, %(lot)s)]
[parameters: ({'total_units': 1.0, 'land_sqft': 20267.0, 'lot': 1.0, 'building_class_id': 1.0, 'city_council_district_id': 1.0, 'neighborhood_id': 1, 'block': 230.0, 'school_district_id': 1.0, 'building_class_category_id': 1, 'fire_company_id': 1.0, 'address_id': 1, 'comm_units': 1.0, 'tax_class_id': 1.0, 'community_district_id': 1.0, 'property_id': 1, 'police_precinct_id': 1.0, 'res_units': 0.0}, {'total_units': 102.0, 'land_sqft': 12500.0, 'lot': 44.0, 'building_class_id': 2.0, 'city_council_district_id': 1.0, 'neighborhood_id': 1, 'block': 250.0, 'school_district_id': 1.0, 'building_class_category_id': 2, 'fire_company_id': 1.0, 'address_id': 2, 'comm_units': 102.0, 'tax_class_id': 1.0, 'community_district_id': 1.0, 'property_id': 2, 'police_precinct_id': 1.0, 'res_units': 0.0}, {'total_units': 0.0, 'land_sqft': 60400.0, 'lot': 16.0, 'building_class_id': 3.0, 'city_council_district_id': 1.0, 'neighborhood_id': 2, 'block': 2287.0, 'school_district_id': 2.0, 'building_class_category_id': 3, 'fire_company_id': None, 'address_id': 3, 'comm_units': 0.0, 'tax_class_id': 1.0, 'community_district_id': 2.0, 'property_id': 3, 'police_precinct_id': 2.0, 'res_units': 0.0}, {'total_units': 2.0, 'land_sqft': 20704.0, 'lot': 1.0, 'building_class_id': 4.0, 'city_council_district_id': 1.0, 'neighborhood_id': 3, 'block': 63.0, 'school_district_id': 1.0, 'building_class_category_id': 2, 'fire_company_id': None, 'address_id': 4, 'comm_units': 2.0, 'tax_class_id': 1.0, 'community_district_id': 1.0, 'property_id': 4, 'police_precinct_id': 1.0, 'res_units': 0.0}, {'total_units': 1.0, 'land_sqft': 21175.0, 'lot': 9.0, 'building_class_id': 1.0, 'city_council_district_id': 1.0, 'neighborhood_id': 3, 'block': 87.0, 'school_district_id': 1.0, 'building_class_category_id': 1, 'fire_company_id': None, 'address_id': 5, 'comm_units': 1.0, 'tax_class_id': 1.0, 'community_district_id': 1.0, 'property_id': 5, 'police_precinct_id': 1.0, 'res_units': 0.0}, {'total_units': 5.0, 'land_sqft': 14800.0, 'lot': 1.0, 'building_class_id': 4.0, 'city_council_district_id': 1.0, 'neighborhood_id': 3, 'block': 77.0, 'school_district_id': 1.0, 'building_class_category_id': 2, 'fire_company_id': None, 'address_id': 6, 'comm_units': 5.0, 'tax_class_id': 1.0, 'community_district_id': 1.0, 'property_id': 6, 'police_precinct_id': 1.0, 'res_units': 0.0}, {'total_units': 3.0, 'land_sqft': 21218.0, 'lot': 5.0, 'building_class_id': 4.0, 'city_council_district_id': 1.0, 'neighborhood_id': 3, 'block': 87.0, 'school_district_id': 1.0, 'building_class_category_id': 2, 'fire_company_id': None, 'address_id': 7, 'comm_units': 3.0, 'tax_class_id': 1.0, 'community_district_id': 1.0, 'property_id': 7, 'police_precinct_id': 1.0, 'res_units': 0.0}, {'total_units': 2.0, 'land_sqft': 20600.0, 'lot': 1.0, 'building_class_id': 4.0, 'city_council_district_id': 1.0, 'neighborhood_id': 3, 'block': 76.0, 'school_district_id': 1.0, 'building_class_category_id': 2, 'fire_company_id': None, 'address_id': 8, 'comm_units': 2.0, 'tax_class_id': 1.0, 'community_district_id': 1.0, 'property_id': 8, 'police_precinct_id': 1.0, 'res_units': 0.0}  ... displaying 10 of 23972 total bound parameter sets ...  {'total_units': 0.0, 'land_sqft': 2390.0, 'lot': 3.0, 'building_class_id': 3.0, 'city_council_district_id': 5.0, 'neighborhood_id': 44, 'block': 3189.0, 'school_district_id': None, 'building_class_category_id': 3, 'fire_company_id': None, 'address_id': 18974, 'comm_units': 0.0, 'tax_class_id': 1.0, 'community_district_id': 4.0, 'property_id': 23971, 'police_precinct_id': 6.0, 'res_units': 0.0}, {'total_units': 1.0, 'land_sqft': 0.0, 'lot': 1090.0, 'building_class_id': 107.0, 'city_council_district_id': None, 'neighborhood_id': 44, 'block': 3328.0, 'school_district_id': None, 'building_class_category_id': 28, 'fire_company_id': None, 'address_id': 18975, 'comm_units': 0.0, 'tax_class_id': 1.0, 'community_district_id': None, 'property_id': 23972, 'police_precinct_id': None, 'res_units': 0.0})]
(Background on this error at: http://sqlalche.me/e/gkpj)

### Property Coordinates Table

In [72]:
property_coordinates_df=df12[['property_id','XCoord','YCoord']]
property_coordinates_df.columns=['property_id','x_cord','y_cord']
property_coordinates_df.to_sql(name='property_coordinates', con=engine, if_exists='append', index=False)
print('Property Coordinates Table Loaded')

IntegrityError: (psycopg2.errors.ForeignKeyViolation) insert or update on table "property_coordinates" violates foreign key constraint "property_coordinates_property_id_fkey"
DETAIL:  Key (property_id)=(1) is not present in table "properties".

[SQL: INSERT INTO property_coordinates (property_id, x_cord, y_cord) VALUES (%(property_id)s, %(x_cord)s, %(y_cord)s)]
[parameters: ({'y_cord': 193713.0, 'x_cord': 985622.0, 'property_id': 1}, {'y_cord': 191977.0, 'x_cord': 986784.0, 'property_id': 2}, {'y_cord': 202986.0, 'x_cord': 995151.0, 'property_id': 3}, {'y_cord': 194630.0, 'x_cord': 987524.0, 'property_id': 4}, {'y_cord': 194154.0, 'x_cord': 987811.0, 'property_id': 5}, {'y_cord': 194462.0, 'x_cord': 987744.0, 'property_id': 6}, {'y_cord': 194159.0, 'x_cord': 987708.0, 'property_id': 7}, {'y_cord': 194475.0, 'x_cord': 987516.0, 'property_id': 8}  ... displaying 10 of 23972 total bound parameter sets ...  {'y_cord': 196667.0, 'x_cord': 1005816.0, 'property_id': 23971}, {'y_cord': None, 'x_cord': None, 'property_id': 23972})]
(Background on this error at: http://sqlalche.me/e/gkpj)

### Property Sales Table

In [48]:
temp_sales=df12[['property_id','sale_price','sale_date','year_of_sale','tax_class',
               'apartment_number','gross_sqft']]

sales_df=pd.merge(temp_sales, tax_class_df, how='left',
                 left_on='tax_class', right_on='tax_class')

sales_df=sales_df[['property_id','sale_price','sale_date','year_of_sale','tax_class_id',
               'apartment_number','gross_sqft']]

sales_df.insert(0, 'sale_id', range(1, 1 + len(sales_df)))
sales_df.to_sql(name='property_sales', con=engine, if_exists='append', index=False)
print('Property Sales Table Loaded')

### Owns Table

In [50]:
owns_df=df12[['OwnerName','property_id']]

owns_df=pd.merge(owns_df, owners_df, how='left',
                left_on='OwnerName', right_on='owner_name')

owns_df=owns_df[['owner_id','property_id']].dropna()
owns_df.to_sql(name='owns', con=engine, if_exists='append', index=False)
print('Owns Table Loaded')

Now that we have successfully uploaded all the data into our PostgreSQL database, we will move over to PostgreSQL to determine if everything has been uploaded correctly in order for us to proceed with more in-depth data analysis of the Brooklyn home sales dataset.