# Use `sqlalchemy` to pull data out of SQL into a `pandas` dataframe

You would use this approach over `psycopg2` if you want the query result returned in a `pandas.DataFrame`


In [1]:
import pandas as pd
import sqlalchemy

In [3]:
MY_DB = "teaching_bucket"
UN = "aaron"
PW = "my_password"
HOST = "localhost"

db_uri = f"postgresql://{UN}:{PW}@{HOST}:5432/{MY_DB}"

### Use `pd.read_sql()` to get the query result from the databasse into a dataframe

In [4]:
query = "SELECT * FROM nyc_stop_frisk_2018 LIMIT 200;"

connection = sqlalchemy.create_engine(db_uri)

df = pd.read_sql(query, connection)

connection.dispose()

### Now you can do whatever you want to the dataset using the `pandas` library

In this example I'm using the `.describe()` function to generate some summary statistics.

I then transpose it 90 degrees with `.T` to make it easier to read

In [7]:
df.describe().T

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
index,200.0,139.255,572.182668,0.0,49.75,99.5,149.25,8150.0
stop_frisk_id,200.0,140.255,572.182668,1.0,50.75,100.5,150.25,8151.0
year2,200.0,2018.0,0.0,2018.0,2018.0,2018.0,2018.0,2018.0
issuing_officer_command_code,200.0,262.84,314.156915,1.0,46.0,94.0,446.0,864.0
supervising_officer_command_code,200.0,268.925,316.161541,1.0,47.5,97.0,483.5,881.0
observed_duration_minutes,200.0,1.805,2.836761,0.0,1.0,1.0,2.0,30.0
stop_duration_minutes,200.0,9.89,16.665431,0.0,5.0,7.0,10.0,225.0
stop_location_precinct,200.0,64.385,33.896336,1.0,40.0,63.0,95.5,121.0
stop_location_x,200.0,1005618.595,21640.965108,931731.0,996858.0,1004284.5,1014129.25,1057847.0
stop_location_y,200.0,205246.15,30546.168998,149463.0,182136.5,201975.0,234900.25,262492.0
