In [None]:
run etl.py drop

In [None]:
import psycopg2
import pandas as pd
import configparser
import matplotlib.pyplot as plt

In [None]:
config = configparser.ConfigParser()
config.read('dwh.cfg')

conn = psycopg2.connect("host={} dbname={} user={} password={} port={}".format(*config['CLUSTER'].values()))
cur = conn.cursor()   


In [None]:
sql_chk_load_cnt = """
select count(*) as total, 'd_age_group' as table_nm from d_age_group
union
select count(*) as total, 'd_area' as table_nm from d_area
union
select count(*) as total, 'd_coordinates' as table_nm from d_coordinates
union
select count(*) as total, 'd_gender_type' as table_nm from d_gender_type
union
select count(*) as total, 'd_jurisdiction' as table_nm from d_jurisdiction
union
select count(*) as total, 'd_race_type' as table_nm from d_race_type
union
select count(*) as total, 'd_us_cities' as table_nm from d_us_cities
union
select count(*) as total, 'f_crime_data' as table_nm from f_crime_data;
"""

sql_crime_by_race_and_year = """
select count(*) total, 
	  case when suspect_race_id = 10 then 'UNKNOWN' else race end as suspect_race, 
	  to_char(rpt_date_of_crime,'YYYY') as Year 
from f_crime_data f
	left join  d_race_type d
		on d.id = suspect_race_id
where crime_location = 'NYC'
	  and rpt_date_of_crime between '2012-01-01' and '2012-12-31'
group by suspect_race_id , race,  to_char(rpt_date_of_crime,'YYYY')
"""

sql_crime_by_area_and_year = """
select count(*) as total, 
	   crime_location, 
	   case 
		when area_or_boro is null then 'N/A'
		when area_or_boro = ' ' then 'N/A'
		else area_or_boro
	   end as area_or_boro,
	   to_char(rpt_date_of_crime,'YYYY') as Year
from f_crime_data f
WHERE rpt_date_of_crime BETWEEN '2014-01-01' and '2016-12-31'
group by crime_location, area_or_boro, to_char(rpt_date_of_crime,'YYYY')
order by 4,1
"""

## Data check


In [None]:
pd.read_sql(sql_chk_load_cnt, conn)

## Crime By Race and Year

In [None]:
pd.read_sql(sql_crime_by_race_and_year, conn)

## Crime of total count in area/boro in a time range

In [None]:
pd.read_sql(sql_crime_by_area_and_year,conn)

## Type of Crime in LA between years 2017 and 2018

In [None]:
sql_type_of_crimes = """select 
count(*) total,
crime_desc,
to_char(rpt_date_of_crime,'YYYY') as Year
from f_crime_data
where crime_location ='LAX'
    and to_char(rpt_date_of_crime,'YYYY') between '2017' and '2018'
group by crime_desc,
to_char(rpt_date_of_crime,'YYYY') 
order by 3,1"""

In [None]:
pd.read_sql(sql_type_of_crimes,conn)