# ETL Project - Create tables

In [1]:
#The following code takes the Extract and Transform from all datasets and combines them
# to create the tables to load in PostGres

In [3]:
# Import Dependencies
import pandas as pd
import psycopg2

# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func, inspect,join
#from secret import username, password
from sqlalchemy.types import Integer, Text, String, Float, DateTime

# Get Transformed Data into DataFrames

In [4]:
#Load Crime Data - Table 1/8
crime_data_df=pd.read_csv("ET_Files/crime_data.csv")
del crime_data_df["Unnamed: 0"]
crime_data_df.head()

Unnamed: 0,Incident,Date,Hour,NIBRS_Class,NIBRS_Description,Offense_Count,Premise,Block_Range,Street_Name,Street_Type,City,Zip_Code
0,20519,1/1/2019,0,13A,Aggravated Assault,1,"Residence, Home (Includes Apartment)",4034,OSBY,DR,HOUSTON,77025
1,20519,1/1/2019,0,23H,All other larceny,1,"Residence, Home (Includes Apartment)",4034,OSBY,DR,HOUSTON,77025
2,20519,1/1/2019,0,290,"Destruction, damage, vandalism",1,"Residence, Home (Includes Apartment)",4034,OSBY,DR,HOUSTON,77025
3,20519,1/1/2019,0,35A,"Drug, narcotic violations",1,"Residence, Home (Includes Apartment)",4034,OSBY,DR,HOUSTON,77025
4,34819,1/1/2019,0,290,"Destruction, damage, vandalism",1,"Residence, Home (Includes Apartment)",4065,SILVERWOOD,DR,HOUSTON,77025


In [5]:
crime_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 22373 entries, 0 to 22372
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Incident           22373 non-null  int64 
 1   Date               22373 non-null  object
 2   Hour               22373 non-null  int64 
 3   NIBRS_Class        22373 non-null  object
 4   NIBRS_Description  22373 non-null  object
 5   Offense_Count      22373 non-null  int64 
 6   Premise            22373 non-null  object
 7   Block_Range        22249 non-null  object
 8   Street_Name        22373 non-null  object
 9   Street_Type        20152 non-null  object
 10  City               22373 non-null  object
 11  Zip_Code           22373 non-null  int64 
dtypes: int64(4), object(8)
memory usage: 2.0+ MB


In [6]:
# Build Table 2 - properties

In [7]:
#Load HCAD Data
hcad_data_df=pd.read_csv("ET_Files/properties_hcad_extract.csv")
del hcad_data_df["Unnamed: 0"]
hcad_data_df=hcad_data_df.rename(columns={'full_addr':'address'})
hcad_data_df.head()

Unnamed: 0,account,zip_code,neighborhood_code,acreage,new_owner_date,address,sq_ft,dup_acct
0,21750000003,77002,8400.07,0.0405,2009-10-13 00:00:00.000,"2202 CAROLINE ST HOUSTON, TEXAS 77002",1944.0,False
1,21750000013,77002,8400.01,0.3099,1995-08-02 00:00:00.000,"2218 CAROLINE ST HOUSTON, TEXAS 77002",2471.0,False
2,21750000018,77002,8400.07,0.0348,2018-06-29 00:00:00.000,"2204 CAROLINE ST HOUSTON, TEXAS 77002",1944.0,False
3,21750000019,77002,8400.07,0.0395,2001-01-01 00:00:00.000,"2206 CAROLINE ST HOUSTON, TEXAS 77002",1944.0,False
4,41320000003,77006,8316.03,0.1378,2014-12-12 00:00:00.000,"105 STRATFORD ST HOUSTON, TEXAS 77006",2576.0,False


In [8]:
hcad_data_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 32591 entries, 0 to 32590
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   account            32591 non-null  int64  
 1   zip_code           32591 non-null  int64  
 2   neighborhood_code  32591 non-null  float64
 3   acreage            32591 non-null  float64
 4   new_owner_date     32591 non-null  object 
 5   address            32591 non-null  object 
 6   sq_ft              32583 non-null  float64
 7   dup_acct           32591 non-null  bool   
dtypes: bool(1), float64(3), int64(2), object(2)
memory usage: 1.8+ MB


In [9]:
#Load flood and school data
flood_school_df=pd.read_csv("ET_Files/flood_data_and_school_id.csv")
# del flood_school_df["Unnamed: 0"]
flood_school_df=flood_school_df.rename(columns={'Address':'address'})
flood_school_df['address']=flood_school_df['address'].str.upper()
flood_school_df.head()

Unnamed: 0,address,Latitude,Longitude,Flood_Description,Flood_Zone,Zip_Code,school_id,school_type
0,"2202 CAROLINE ST HOUSTON, TEXAS 77002",29.74614,-95.36987,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912110,Elementary
1,"2204 CAROLINE ST HOUSTON, TEXAS 77002",29.74619,-95.36996,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912110,Elementary
2,"2206 CAROLINE ST HOUSTON, TEXAS 77002",29.74624,-95.37004,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912058,Elementary
3,"2251 AUSTIN ST HOUSTON, TEXAS 77002",29.7453,-95.36882,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912110,Elementary
4,"2255 AUSTIN ST HOUSTON, TEXAS 77002",29.74525,-95.36874,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912110,Elementary


In [10]:
flood_school_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 69690 entries, 0 to 69689
Data columns (total 8 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   address            69690 non-null  object 
 1   Latitude           69690 non-null  float64
 2   Longitude          69690 non-null  float64
 3   Flood_Description  57480 non-null  object 
 4   Flood_Zone         69690 non-null  object 
 5   Zip_Code           69690 non-null  int64  
 6   school_id          69690 non-null  int64  
 7   school_type        69690 non-null  object 
dtypes: float64(2), int64(2), object(4)
memory usage: 4.3+ MB


In [11]:
#Create dataframe for properties: merging hcad_data_df and flood_school_df on address - Table 2/8
properties_df=pd.merge(hcad_data_df,flood_school_df,on="address")
properties_df=properties_df.rename(columns={'Flood_Description':'flood_description',
                                            'Latitude':'latitude',
                                            'Longitude':'longitude'
                                           })
properties_df.head()

Unnamed: 0,account,zip_code,neighborhood_code,acreage,new_owner_date,address,sq_ft,dup_acct,latitude,longitude,flood_description,Flood_Zone,Zip_Code,school_id,school_type
0,21750000003,77002,8400.07,0.0405,2009-10-13 00:00:00.000,"2202 CAROLINE ST HOUSTON, TEXAS 77002",1944.0,False,29.74614,-95.36987,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912110,Elementary
1,21750000003,77002,8400.07,0.0405,2009-10-13 00:00:00.000,"2202 CAROLINE ST HOUSTON, TEXAS 77002",1944.0,False,29.74614,-95.36987,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912467,Middle
2,21750000003,77002,8400.07,0.0405,2009-10-13 00:00:00.000,"2202 CAROLINE ST HOUSTON, TEXAS 77002",1944.0,False,29.74614,-95.36987,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912025,High
3,21750000013,77002,8400.01,0.3099,1995-08-02 00:00:00.000,"2218 CAROLINE ST HOUSTON, TEXAS 77002",2471.0,False,29.74605,-95.37014,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912110,Elementary
4,21750000013,77002,8400.01,0.3099,1995-08-02 00:00:00.000,"2218 CAROLINE ST HOUSTON, TEXAS 77002",2471.0,False,29.74605,-95.37014,AREA OF MINIMAL FLOOD HAZARD,X,77002,101912463,Middle


In [12]:
properties_df=properties_df.reindex(columns= ['account','latitude','longitude','address','zip_code', 'neighborhood_code','acreage','new_owner_date',
                                                      'sq_ft','school_id','school_type','flood_description'])
properties_df.head()

Unnamed: 0,account,latitude,longitude,address,zip_code,neighborhood_code,acreage,new_owner_date,sq_ft,school_id,school_type,flood_description
0,21750000003,29.74614,-95.36987,"2202 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.07,0.0405,2009-10-13 00:00:00.000,1944.0,101912110,Elementary,AREA OF MINIMAL FLOOD HAZARD
1,21750000003,29.74614,-95.36987,"2202 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.07,0.0405,2009-10-13 00:00:00.000,1944.0,101912467,Middle,AREA OF MINIMAL FLOOD HAZARD
2,21750000003,29.74614,-95.36987,"2202 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.07,0.0405,2009-10-13 00:00:00.000,1944.0,101912025,High,AREA OF MINIMAL FLOOD HAZARD
3,21750000013,29.74605,-95.37014,"2218 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.01,0.3099,1995-08-02 00:00:00.000,2471.0,101912110,Elementary,AREA OF MINIMAL FLOOD HAZARD
4,21750000013,29.74605,-95.37014,"2218 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.01,0.3099,1995-08-02 00:00:00.000,2471.0,101912463,Middle,AREA OF MINIMAL FLOOD HAZARD


In [13]:
properties_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 61266 entries, 0 to 61265
Data columns (total 12 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   account            61266 non-null  int64  
 1   latitude           61266 non-null  float64
 2   longitude          61266 non-null  float64
 3   address            61266 non-null  object 
 4   zip_code           61266 non-null  int64  
 5   neighborhood_code  61266 non-null  float64
 6   acreage            61266 non-null  float64
 7   new_owner_date     61266 non-null  object 
 8   sq_ft              61257 non-null  float64
 9   school_id          61266 non-null  int64  
 10  school_type        61266 non-null  object 
 11  flood_description  48786 non-null  object 
dtypes: float64(5), int64(3), object(4)
memory usage: 6.1+ MB


In [14]:
#Create df of zipcodes - Table 3/8
zip_code=hcad_data_df['zip_code'].unique()
zip_code_df=pd.DataFrame(zip_code)
zip_code_df=zip_code_df.rename(columns={0:'zip_code'})
zip_code_df

Unnamed: 0,zip_code
0,77002
1,77006
2,77019
3,77098
4,77054
5,77005
6,77025
7,77027
8,77030


In [15]:
zip_code_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9 entries, 0 to 8
Data columns (total 1 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   zip_code  9 non-null      int64
dtypes: int64(1)
memory usage: 200.0 bytes


In [16]:
#Load Flood Zones Description - Table 4/8
flood_zone_df=pd.read_csv("ET_Files/flood_zones_description.csv")
del flood_zone_df["Unnamed: 0"]
flood_zone_df.head()

Unnamed: 0,Flood_Description,Flood_Zone
0,0.2 PCT ANNUAL CHANCE FLOOD HAZARD,X
1,AREA OF MINIMAL FLOOD HAZARD,X
2,FLOODWAY,AE
3,High-Risk Flood Zone,AE


In [17]:
flood_zone_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   Flood_Description  4 non-null      object
 1   Flood_Zone         4 non-null      object
dtypes: object(2)
memory usage: 192.0+ bytes


In [18]:
#Load School  - Table 5/8
school_district_df=pd.read_csv("ET_Files/school_districts.csv")
school_district_df.head(30)

Unnamed: 0,district_id,district_name
0,1,CYPRESS-FAIRBANKS ISD
1,2,CHANNELVIEW ISD
2,3,GOOSE CREEK CISD
3,4,ALIEF ISD
4,5,HOUSTON ISD
5,6,ALDINE ISD
6,7,SPRING ISD
7,8,HUMBLE ISD
8,9,PASADENA ISD
9,10,CROSBY ISD


In [19]:
school_district_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 2 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   district_id    19 non-null     int64 
 1   district_name  19 non-null     object
dtypes: int64(1), object(1)
memory usage: 432.0+ bytes


In [20]:
#Load school ratings  - Table 6/8
school_ratings_df=pd.read_csv("ET_Files/school_ratings.csv")
school_ratings_df=school_ratings_df.reindex(columns= ['school_id','school_type','name', 'address','city','zip_code',
                                                      'district_id','latitude','longitude','school_rating'])
school_ratings_df.head()

Unnamed: 0,school_id,school_type,name,address,city,zip_code,district_id,latitude,longitude,school_rating
0,101907107,Elementary,ADAM ELEMENTARY,11303 HONEYGROVE LN,HOUSTON,77065,1,29.926556,-95.603242,85
1,101905043,Middle,AGUIRRE JUNIOR HIGH,15726 WALLISVILLE RD,HOUSTON,77049,2,29.809586,-95.156563,85
2,101911101,Elementary,ALAMO ELEMENTARY,6100 N MAIN,BAYTOWN,77521,3,29.79278,-94.963885,95
3,101903045,Middle,ALBRIGHT MIDDLE,6315 WINKLEMAN,HOUSTON,77083,4,29.709561,-95.654675,95
4,101912102,Elementary,ALCOTT ELEMENTARY,5859 BELLFORT,HOUSTON,77033,5,29.667765,-95.329295,85


In [21]:
school_ratings_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 822 entries, 0 to 821
Data columns (total 10 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   school_id      822 non-null    int64  
 1   school_type    822 non-null    object 
 2   name           822 non-null    object 
 3   address        822 non-null    object 
 4   city           822 non-null    object 
 5   zip_code       822 non-null    int64  
 6   district_id    822 non-null    int64  
 7   latitude       822 non-null    float64
 8   longitude      822 non-null    float64
 9   school_rating  822 non-null    int64  
dtypes: float64(2), int64(4), object(4)
memory usage: 64.3+ KB


In [22]:
#Load neighborhoods - Table 7/8
neighborhoods_df=pd.read_csv("ET_Files/neighborhoods_hcad_extract.csv")
del neighborhoods_df["Unnamed: 0"]
del neighborhoods_df["neighborhood_group"]
neighborhoods_df.head()

Unnamed: 0,neighborhood_code,neighborhood
0,0.02,TEST CONDO NBHD
1,0.5,VALUATION TEST NBHD
2,1.0,"SETTLERS VILLAGE 1,1R/P & 2"
3,1.01,"SETTLERS VILLAGE SEC. 3, 3R/P, 5 ,& 5 R/P"
4,1.04,VILLAGES ON GRANT


In [23]:
neighborhoods_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12362 entries, 0 to 12361
Data columns (total 2 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   neighborhood_code  12362 non-null  float64
 1   neighborhood       12362 non-null  object 
dtypes: float64(1), object(1)
memory usage: 193.3+ KB


In [24]:
#Load appraisal - Table 8/8
appraisal_df=pd.read_csv("ET_Files/appraisal_hcad_extract.csv")
del appraisal_df['Unnamed: 0']
appraisal_df=appraisal_df.reindex(columns= ['account','land_value', 'total_appraised_value',
                                            'total_market_value','tax_year'])
appraisal_df.head()

Unnamed: 0,account,land_value,total_appraised_value,total_market_value,tax_year
0,21750000003,88956.0,299000.0,299000.0,2019
1,21750000013,675000.0,815000.0,815000.0,2019
2,21750000018,84537.0,296400.0,296400.0,2019
3,21750000019,88236.0,299803.0,299803.0,2019
4,41320000003,385000.0,732461.0,732461.0,2019


In [25]:
appraisal_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 61464 entries, 0 to 61463
Data columns (total 5 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   account                61464 non-null  int64  
 1   land_value             61464 non-null  float64
 2   total_appraised_value  61464 non-null  float64
 3   total_market_value     61464 non-null  float64
 4   tax_year               61464 non-null  int64  
dtypes: float64(3), int64(2)
memory usage: 2.3 MB


## Create DataFrames for Each table in the ERD

## Load Data in SQL PostGres

In [27]:
#Connect to local database
rds_connection_string = f'{postgres}:{Apex2020!!}@localhost:5432/Houston_Real_Estate_db'
engine = create_engine(f'postgresql://{rds_connection_string}')

SyntaxError: f-string: invalid conversion character: expected 's', 'r', or 'a' (<ipython-input-27-8132563699cd>, line 2)

In [27]:
# check for table names. Make sure databse is empty if code needs to be re-run
engine.table_names()

[]

In [None]:
# #Drop all tables
# conn = psycopg2.connect(database="Houston_Real_Estate_db", user='postgres', password='password', host='127.0.0.1', port= '5432')

# #Setting auto commit false
# conn.autocommit = True

# #Creating a cursor object using the cursor() method
# cursor = conn.cursor()

# #Droping EMPLOYEE table if already exists
# cursor.execute("DROP TABLE crime")
# print("Table crime dropped... ")
# cursor.execute("DROP TABLE properties")
# print("Table properties dropped... ")
# cursor.execute("DROP TABLE zip_code")
# print("Table zip_code dropped... ")
# cursor.execute("DROP TABLE flood_zone")
# print("Table flood_zone dropped... ")
# cursor.execute("DROP TABLE appraisal")
# print("Table appraisal dropped... ")
# cursor.execute("DROP TABLE neighborhoods")
# print("Table neighborhoods dropped... ")
# cursor.execute("DROP TABLE school")
# print("Table school dropped... ")
# cursor.execute("DROP TABLE school_district")
# print("Table school_district dropped... ")

# #Commit your changes in the database
# conn.commit()

# #Closing the connection
# conn.close()

In [28]:
#Load Crime Table
crime_data_df.to_sql(name='crime', con=engine, if_exists='replace', index=False, chunksize=10000)

In [29]:
# Confirm that data has been added
pd.read_sql_query('select * from crime', con=engine).head()

Unnamed: 0,Incident,Date,Hour,NIBRS_Class,NIBRS_Description,Offense_Count,Premise,Block_Range,Street_Name,Street_Type,City,Zip_Code
0,20519,1/1/2019,0,13A,Aggravated Assault,1,"Residence, Home (Includes Apartment)",4034,OSBY,DR,HOUSTON,77025
1,20519,1/1/2019,0,23H,All other larceny,1,"Residence, Home (Includes Apartment)",4034,OSBY,DR,HOUSTON,77025
2,20519,1/1/2019,0,290,"Destruction, damage, vandalism",1,"Residence, Home (Includes Apartment)",4034,OSBY,DR,HOUSTON,77025
3,20519,1/1/2019,0,35A,"Drug, narcotic violations",1,"Residence, Home (Includes Apartment)",4034,OSBY,DR,HOUSTON,77025
4,34819,1/1/2019,0,290,"Destruction, damage, vandalism",1,"Residence, Home (Includes Apartment)",4065,SILVERWOOD,DR,HOUSTON,77025


In [30]:
# Load properties table
properties_df.to_sql(name='properties', con=engine, if_exists='replace', index=False)

In [31]:
# Confirm that data has been added
pd.read_sql_query('select * from properties', con=engine).head()

Unnamed: 0,account,latitude,longitude,address,zip_code,neighborhood_code,acreage,new_owner_date,sq_ft,school_id,school_type,flood_description
0,21750000003,29.74614,-95.36987,"2202 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.07,0.0405,2009-10-13 00:00:00.000,1944.0,101912110,Elementary,AREA OF MINIMAL FLOOD HAZARD
1,21750000003,29.74614,-95.36987,"2202 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.07,0.0405,2009-10-13 00:00:00.000,1944.0,101912467,Middle,AREA OF MINIMAL FLOOD HAZARD
2,21750000003,29.74614,-95.36987,"2202 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.07,0.0405,2009-10-13 00:00:00.000,1944.0,101912025,High,AREA OF MINIMAL FLOOD HAZARD
3,21750000013,29.74605,-95.37014,"2218 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.01,0.3099,1995-08-02 00:00:00.000,2471.0,101912110,Elementary,AREA OF MINIMAL FLOOD HAZARD
4,21750000013,29.74605,-95.37014,"2218 CAROLINE ST HOUSTON, TEXAS 77002",77002,8400.01,0.3099,1995-08-02 00:00:00.000,2471.0,101912463,Middle,AREA OF MINIMAL FLOOD HAZARD


In [32]:
#Load Zip Codes
zip_code_df.to_sql(name='zip_code', con=engine, if_exists='replace', index=False)

In [33]:
# Confirm that data has been added
pd.read_sql_query('select * from zip_code', con=engine).head()

Unnamed: 0,zip_code
0,77002
1,77006
2,77019
3,77098
4,77054


In [34]:
#Load Flood Description Table 
flood_zone_df.to_sql(name='flood_zone', con=engine, if_exists='replace', index=False, chunksize=10000)

In [35]:
# Confirm that data has been added
pd.read_sql_query('select * from flood_zone', con=engine).head()

Unnamed: 0,Flood_Description,Flood_Zone
0,0.2 PCT ANNUAL CHANCE FLOOD HAZARD,X
1,AREA OF MINIMAL FLOOD HAZARD,X
2,FLOODWAY,AE
3,High-Risk Flood Zone,AE


In [36]:
#Load Appraisal Table
appraisal_df.to_sql(name='appraisal', con=engine, if_exists='replace', index=False)

In [37]:
# Confirm that data has been added
pd.read_sql_query('select * from appraisal', con=engine).head()

Unnamed: 0,account,land_value,total_appraised_value,total_market_value,tax_year
0,21750000003,88956.0,299000.0,299000.0,2019
1,21750000013,675000.0,815000.0,815000.0,2019
2,21750000018,84537.0,296400.0,296400.0,2019
3,21750000019,88236.0,299803.0,299803.0,2019
4,41320000003,385000.0,732461.0,732461.0,2019


In [38]:
#Load Neighborhoods Table
neighborhoods_df.to_sql(name='neighborhoods', con=engine, if_exists='replace', index=False, chunksize=10000)

In [39]:
# Confirm that data has been added
pd.read_sql_query('select * from neighborhoods', con=engine).head()

Unnamed: 0,neighborhood_code,neighborhood
0,0.02,TEST CONDO NBHD
1,0.5,VALUATION TEST NBHD
2,1.0,"SETTLERS VILLAGE 1,1R/P & 2"
3,1.01,"SETTLERS VILLAGE SEC. 3, 3R/P, 5 ,& 5 R/P"
4,1.04,VILLAGES ON GRANT


In [40]:
#Load School Table
school_ratings_df.to_sql(name='school', con=engine, if_exists='replace', index=False, chunksize=10000)

In [41]:
# Confirm that data has been added
pd.read_sql_query('select * from school', con=engine).head()

Unnamed: 0,school_id,school_type,name,address,city,zip_code,district_id,latitude,longitude,school_rating
0,101907107,Elementary,ADAM ELEMENTARY,11303 HONEYGROVE LN,HOUSTON,77065,1,29.926556,-95.603242,85
1,101905043,Middle,AGUIRRE JUNIOR HIGH,15726 WALLISVILLE RD,HOUSTON,77049,2,29.809586,-95.156563,85
2,101911101,Elementary,ALAMO ELEMENTARY,6100 N MAIN,BAYTOWN,77521,3,29.79278,-94.963885,95
3,101903045,Middle,ALBRIGHT MIDDLE,6315 WINKLEMAN,HOUSTON,77083,4,29.709561,-95.654675,95
4,101912102,Elementary,ALCOTT ELEMENTARY,5859 BELLFORT,HOUSTON,77033,5,29.667765,-95.329295,85


In [42]:
#Load School Districts Table
school_district_df.to_sql(name='school_district', con=engine, if_exists='replace', index=False, chunksize=10000)

In [43]:
# Confirm that data has been added
pd.read_sql_query('select * from school_district', con=engine).head()

Unnamed: 0,district_id,district_name
0,1,CYPRESS-FAIRBANKS ISD
1,2,CHANNELVIEW ISD
2,3,GOOSE CREEK CISD
3,4,ALIEF ISD
4,5,HOUSTON ISD
