In [1]:
import numpy as np 
import pandas as pd

from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, func

from flask import Flask, render_template, redirect, jsonify, json, request

# NOTE: Add to criteria for Heroku Deployment
import pickle 

In [5]:
engine = create_engine("sqlite:///birthdata.sqlite")

# reflect an existing database into a new model
Base = automap_base()
# reflect the tables
Base.prepare(engine, reflect=True)

# Save reference to the tables
County = Base.classes.county
National = Base.classes.national
Outcomes = Base.classes.outcomes

County2015 = Base.classes.clinic2015_county
County2010 = Base.classes.clinic2010_county

State2015 = Base.classes.clinic2015_state
State2010 = Base.classes.clinic2010_state

Countypop = Base.classes.county_pop
CountySVI = Base.classes.county_svi

In [6]:
session = Session(engine)

In [7]:
# Query to return overall US data and state-specifc by year, filter to return 15-19 year data 
resultsUS = session.query(National.year, National.us_rate).filter(National.age_group == "15-19 years").order_by(National.year.asc()).distinct()
resultsState = session.query(National.state, National.year, National.state_rate).filter(National.age_group == "15-19 years").order_by(National.year.asc()).all()
    # Use of distinct: https://stackoverflow.com/questions/48102501/remove-duplicates-from-sqlalchemy-query-using-set

    # Store separate lists of dictionaries
USData = []
for r in resultsUS: 
    USData.append({"rate": r[1], "year": r[0]})
stateData = []
for r in resultsState:
    stateData.append({"rate": r[2], "state": r[0], "year": r[1]})

In [96]:
#stateData

In [113]:
# Query for US birth rate and year, filtering for 15-17 and 18-19 year data 
resultsBirthRate1517 = session.query(National.year, National.state_rate, National.state).filter(National.age_group == "15-17 years").order_by(National.year.asc()).distinct()
resultsBirthRate1819 = session.query(National.year, National.state_rate, National.state).filter(National.age_group == "18-19 years").order_by(National.year.asc()).distinct()
resultsUSBirthRate1517 = session.query(National.year, National.us_rate).filter(National.age_group == "15-17 years").order_by(National.year.asc()).distinct()
resultsUSBirthRate1819 = session.query(National.year, National.us_rate).filter(National.age_group == "18-19 years").order_by(National.year.asc()).distinct()
    # Use of distinct: https://stackoverflow.com/questions/48102501/remove-duplicates-from-sqlalchemy-query-using-set

    # Store separate lists of dictionaries
birthRate1517 = []
for r in resultsBirthRate1517: 
    birthRate1517.append({"rate": r[1], "year": r[0], "state":r[2]})
birthRate1819 = []
for r in resultsBirthRate1819: 
    birthRate1819.append({"rate": r[1], "year": r[0], "state":r[2]})
    
# Overal US dictionary 
USBirthRate1517  = []
for r in resultsUSBirthRate1517: 
    USBirthRate1517.append({"rate": r[1], "year": r[0]})
USBirthRate1819  = []
for r in resultsUSBirthRate1819: 
    USBirthRate1819.append({"rate": r[1], "year": r[0]})

In [114]:
birthRate1819

[{'rate': 84.8, 'year': 2003, 'state': 'Alabama'},
 {'rate': 78.5, 'year': 2003, 'state': 'Alaska'},
 {'rate': 102.5, 'year': 2003, 'state': 'Arizona'},
 {'rate': 103.2, 'year': 2003, 'state': 'Arkansas'},
 {'rate': 69.4, 'year': 2003, 'state': 'California'},
 {'rate': 70.7, 'year': 2003, 'state': 'Colorado'},
 {'rate': 43.1, 'year': 2003, 'state': 'Connecticut'},
 {'rate': 65.6, 'year': 2003, 'state': 'Delaware'},
 {'rate': 47.5, 'year': 2003, 'state': 'District of Columbia'},
 {'rate': 75.2, 'year': 2003, 'state': 'Florida'},
 {'rate': 90.4, 'year': 2003, 'state': 'Georgia'},
 {'rate': 70.7, 'year': 2003, 'state': 'Hawaii'},
 {'rate': 67.2, 'year': 2003, 'state': 'Idaho'},
 {'rate': 67.5, 'year': 2003, 'state': 'Illinois'},
 {'rate': 72.8, 'year': 2003, 'state': 'Indiana'},
 {'rate': 51.7, 'year': 2003, 'state': 'Iowa'},
 {'rate': 69.8, 'year': 2003, 'state': 'Kansas'},
 {'rate': 85.9, 'year': 2003, 'state': 'Kentucky'},
 {'rate': 90.9, 'year': 2003, 'state': 'Louisiana'},
 {'rate': 

In [16]:
results = session.query(National.year, National.age_group, National.us_rate, 
                        National.state, National.state_rate).order_by(National.year.asc()).all()

df = pd.DataFrame(results, columns=['year', 'age', 'usrate', 'state', 'staterate'])
df


Unnamed: 0,year,age,usrate,state,staterate
0,2003,15-17 years,22.2,Alabama,28.5
1,2003,15-17 years,22.2,Alaska,19.8
2,2003,15-17 years,22.2,Arizona,36.2
3,2003,15-17 years,22.2,Arkansas,30.4
4,2003,15-17 years,22.2,California,21.5
...,...,...,...,...,...
2491,2018,18-19 years,32.3,Virginia,26.2
2492,2018,18-19 years,32.3,Washington,25.0
2493,2018,18-19 years,32.3,West Virginia,50.4
2494,2018,18-19 years,32.3,Wisconsin,24.4


In [83]:
# Organize df to return overall US data and state-specifc by year, filter to return 15-19 year data 
linedf = df.loc[df["age"] == '15-19 years']
#subsetdf = subsetdf.drop_duplicates()
linedf

Unnamed: 0,year,age,usrate,state,staterate
52,2003,15-19 years,41.1,Alabama,51.4
53,2003,15-19 years,41.1,Alaska,41.2
54,2003,15-19 years,41.1,Arizona,61.9
55,2003,15-19 years,41.1,Arkansas,59.3
56,2003,15-19 years,41.1,California,40.1
...,...,...,...,...,...
2439,2018,15-19 years,17.4,Virginia,14.3
2440,2018,15-19 years,17.4,Washington,12.7
2441,2018,15-19 years,17.4,West Virginia,25.4
2442,2018,15-19 years,17.4,Wisconsin,13.0


In [97]:
USdatadf = linedf[['usrate', 'year']].drop_duplicates()
USdatadf.reset_index(drop=True, inplace=True)
USdatadf.rename(columns={"usrate": "rate"}, inplace=True)
USData = USdatadf.to_json(orient="records")


In [98]:
stateDatadf = linedf[['staterate', 'state', 'year']].drop_duplicates()
stateDatadf.reset_index(drop=True, inplace=True)
stateDatadf.rename(columns={'staterate': 'rate'}, inplace=True)
stateData = stateDatadf.to_json(orient="records")


In [102]:
# Query for US birth rate and year, filtering for 15-17 and 18-19 year data
df

Unnamed: 0,year,age,usrate,state,staterate
0,2003,15-17 years,22.2,Alabama,28.5
1,2003,15-17 years,22.2,Alaska,19.8
2,2003,15-17 years,22.2,Arizona,36.2
3,2003,15-17 years,22.2,Arkansas,30.4
4,2003,15-17 years,22.2,California,21.5
...,...,...,...,...,...
2491,2018,18-19 years,32.3,Virginia,26.2
2492,2018,18-19 years,32.3,Washington,25.0
2493,2018,18-19 years,32.3,West Virginia,50.4
2494,2018,18-19 years,32.3,Wisconsin,24.4


In [126]:
df1517 = df.loc[df["age"] == '15-17 years', ['staterate', 'year', 'state']].drop_duplicates()
df1819 = df.loc[df["age"] == '18-19 years', ['staterate', 'year', 'state']].drop_duplicates()
df1517us = df.loc[df["age"] == '15-17 years', ['usrate', 'year']].drop_duplicates()
df1819us = df.loc[df["age"] == '18-19 years', ['usrate', 'year']].drop_duplicates()


In [127]:
df1517.reset_index(drop=True, inplace=True)
df1517.rename(columns={'staterate': 'rate'}, inplace=True)
birthRate1517 = df1517.to_json(orient="records")


In [128]:
df1819.reset_index(drop=True, inplace=True)
df1819.rename(columns={'staterate': 'rate'}, inplace=True)
birthRate1819 = df1819.to_json(orient="records")


In [131]:
df1517us.reset_index(drop=True, inplace=True)
df1517us.rename(columns={'usrate': 'rate'}, inplace=True)
USBirthRate1517 = df1517us.to_json(orient="records")

df1819us.reset_index(drop=True, inplace=True)
df1819us.rename(columns={'usrate': 'rate'}, inplace=True)
USBirthRate1819 = df1819us.to_json(orient="records")

In [133]:
USBirthRate1517

'[{"rate":22.2,"year":2003},{"rate":21.8,"year":2004},{"rate":21.1,"year":2005},{"rate":21.6,"year":2006},{"rate":21.7,"year":2007},{"rate":21.1,"year":2008},{"rate":19.6,"year":2009},{"rate":17.3,"year":2010},{"rate":15.4,"year":2011},{"rate":14.1,"year":2012},{"rate":12.3,"year":2013},{"rate":10.9,"year":2014},{"rate":9.9,"year":2015},{"rate":8.8,"year":2016},{"rate":7.9,"year":2017},{"rate":7.2,"year":2018}]'