## Funding Provided Predictive Model 

Building a predictive model to see if selected demographic factor will lead to if the area will get funded for broadband development

1. Find out the blocks that do not have broad band access for the year 2014, 2015, 2016, 2017
2. Find out the block that are funded to have broadband access in the next year
3. Aggregate census feature tables for all the blocks that do not have broad band access

In [1]:
import pandas as pd
import numpy as np

In [10]:
query14 = """
SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2014_latest fcc2014
on all_block.block_code = fcc2014.block_code
where fcc2014.block_code is null 
"""

query15 = """
SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2015_latest fcc2015
on all_block.block_code = fcc2015.block_code
where fcc2015.block_code is null 
"""

query16 = """
SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2016_latest fcc2016
on all_block.block_code = fcc2016.block_code
where fcc2016.block_code is null 
"""

query17 = """
SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2017_latest fcc2017
on all_block.block_code = fcc2017.block_code
where fcc2017.block_code is null 
"""

no_deployment14 = pd.read_gbq(query14, project_id='fccdsicapstone-218522', dialect='standard')
no_deployment15 = pd.read_gbq(query15, project_id='fccdsicapstone-218522', dialect='standard')
no_deployment16 = pd.read_gbq(query16, project_id='fccdsicapstone-218522', dialect='standard')
no_deployment17 = pd.read_gbq(query17, project_id='fccdsicapstone-218522', dialect='standard')

In [22]:
fund_query15 = """

SELECT noAccess14.no_deployment as block_code,
CASE WHEN fund.census_block is null THEN 0 ELSE 1 END AS funded_label
FROM
(SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2014_latest fcc2014
on all_block.block_code = fcc2014.block_code
where fcc2014.block_code is null ) noAccess14
LEFT Join (SELECT * FROM broadband.fund WHERE year = 2015) as fund
ON noAccess14.no_deployment = fund.census_block

"""
fund_query16 = """

SELECT noAccess15.no_deployment as block_code,
CASE WHEN fund.census_block is null THEN 0 ELSE 1 END AS funded_label
FROM
(SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2015_latest fcc2015
on all_block.block_code = fcc2015.block_code
where fcc2015.block_code is null ) noAccess15
LEFT Join (SELECT * FROM broadband.fund WHERE year = 2016) as fund
ON noAccess15.no_deployment = fund.census_block

"""
fund_query17 = """

SELECT noAccess16.no_deployment as block_code,
CASE WHEN fund.census_block is null THEN 0 ELSE 1 END AS funded_label
FROM
(SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2016_latest fcc2016
on all_block.block_code = fcc2016.block_code
where fcc2016.block_code is null ) noAccess16
LEFT Join (SELECT * FROM broadband.fund WHERE year = 2017) as fund
ON noAccess16.no_deployment = fund.census_block

"""
funded_block_15 = pd.read_gbq(fund_query15, project_id='fccdsicapstone-218522', dialect='standard')
funded_block_16 = pd.read_gbq(fund_query16, project_id='fccdsicapstone-218522', dialect='standard')
funded_block_17 = pd.read_gbq(fund_query17, project_id='fccdsicapstone-218522', dialect='standard')

In [23]:
print(funded_block_15.shape)
print(funded_block_16.shape)
print(funded_block_17.shape)

(2493724, 2)
(2316072, 2)
(2197169, 2)


In [25]:
##Aggregate up to Block_group Level 

bg_query15 = """

SELECT SUBSTR(A.block_code, 1, 12)as block_group_code, 
CASE WHEN SUM(A.funded_label)>=2 THEN 1 ELSE 0 END as label
FROM 

(SELECT noAccess14.no_deployment as block_code,
CASE WHEN fund.census_block is null THEN 0 ELSE 1 END AS funded_label
FROM
(SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2014_latest fcc2014
on all_block.block_code = fcc2014.block_code
where fcc2014.block_code is null ) noAccess14
LEFT Join (SELECT * FROM broadband.fund WHERE year = 2015) as fund
ON noAccess14.no_deployment = fund.census_block) A

GROUP BY 1

"""
bg_15 = pd.read_gbq(bg_query15, project_id='fccdsicapstone-218522', dialect='standard')

In [48]:
bg_acs_query15 = """

SELECT labeltable.block_group_code, acsgeo.population, acsgeo.perc_urban, acsgeo.elevation_mean, acsgeo.two_or_more_races, 
acsgeo.educational_attainment_total, acsgeo.median_household_income_in_the_past_12_months, acsgeo.owner_occupied, 
acsgeo.health_insurance_total, labeltable.label

FROM 

(SELECT SUBSTR(A.block_code, 1, 12)as block_group_code, 
CASE WHEN SUM(A.funded_label)>=2 THEN 1 ELSE 0 END as label
FROM 
(SELECT noAccess14.no_deployment as block_code,
CASE WHEN fund.census_block is null THEN 0 ELSE 1 END AS funded_label
FROM
(SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2014_latest fcc2014
on all_block.block_code = fcc2014.block_code
where fcc2014.block_code is null ) noAccess14
LEFT Join (SELECT * FROM broadband.fund WHERE year = 2015) as fund
ON noAccess14.no_deployment = fund.census_block) A
GROUP BY 1) labeltable

LEFT JOIN

broadband.all_block_groups_acs_geo_agg_2014 acsgeo

ON labeltable.block_group_code = acsgeo.bg_code

"""


bg_acs_query16 = """

SELECT labeltable.block_group_code, acsgeo.population, acsgeo.perc_urban, acsgeo.elevation_mean, acsgeo.two_or_more_races, 
acsgeo.educational_attainment_total, acsgeo.median_household_income_in_the_past_12_months, acsgeo.owner_occupied, 
acsgeo.health_insurance_total, labeltable.label

FROM 

(SELECT SUBSTR(A.block_code, 1, 12)as block_group_code, 
CASE WHEN SUM(A.funded_label)>=2 THEN 1 ELSE 0 END as label
FROM 
(SELECT noAccess15.no_deployment as block_code,
CASE WHEN fund.census_block is null THEN 0 ELSE 1 END AS funded_label
FROM
(SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2015_latest fcc2015
on all_block.block_code = fcc2015.block_code
where fcc2015.block_code is null ) noAccess15
LEFT Join (SELECT * FROM broadband.fund WHERE year = 2016) as fund
ON noAccess15.no_deployment = fund.census_block) A
GROUP BY 1) labeltable

LEFT JOIN

broadband.all_block_groups_acs_geo_agg_2015 acsgeo

ON labeltable.block_group_code = acsgeo.bg_code

"""

bg_acs_query17 = """

SELECT labeltable.block_group_code, acsgeo.population, acsgeo.perc_urban, acsgeo.elevation_mean, acsgeo.two_or_more_races, 
acsgeo.educational_attainment_total, acsgeo.median_household_income_in_the_past_12_months, acsgeo.owner_occupied, 
acsgeo.health_insurance_total, labeltable.label

FROM 

(SELECT SUBSTR(A.block_code, 1, 12)as block_group_code, 
CASE WHEN SUM(A.funded_label)>=2 THEN 1 ELSE 0 END as label
FROM 
(SELECT noAccess16.no_deployment as block_code,
CASE WHEN fund.census_block is null THEN 0 ELSE 1 END AS funded_label
FROM
(SELECT all_block.block_code as no_deployment
FROM broadband.census_block all_block
left join broadband.fcc2016_latest fcc2016
on all_block.block_code = fcc2016.block_code
where fcc2016.block_code is null ) noAccess16
LEFT Join (SELECT * FROM broadband.fund WHERE year = 2017) as fund
ON noAccess16.no_deployment = fund.census_block) A
GROUP BY 1) labeltable

LEFT JOIN

broadband.all_block_groups_acs_geo_agg_2016 acsgeo

ON labeltable.block_group_code = acsgeo.bg_code

"""

bg_acs_15 = pd.read_gbq(bg_acs_query15, project_id='fccdsicapstone-218522', dialect='standard')
bg_acs_16 = pd.read_gbq(bg_acs_query16, project_id='fccdsicapstone-218522', dialect='standard')
bg_acs_17 = pd.read_gbq(bg_acs_query17, project_id='fccdsicapstone-218522', dialect='standard')

In [49]:
data_15 = bg_acs_15[bg_acs_15['median_household_income_in_the_past_12_months'].notnull()]
data_16 = bg_acs_16[bg_acs_16['median_household_income_in_the_past_12_months'].notnull()]
data_17 = bg_acs_17[bg_acs_17['median_household_income_in_the_past_12_months'].notnull()]

Unnamed: 0,block_group_code,population,perc_urban,elevation_mean,two_or_more_races,educational_attainment_total,median_household_income_in_the_past_12_months,owner_occupied,health_insurance_total,label
33,245102505001,0.0,0.950000,10.569927,0.0,0.0,-666666666,0.0,0.0,0
34,330159800111,0.0,0.885714,66.240020,0.0,0.0,-666666666,0.0,0.0,0
35,240059800001,0.0,0.969112,3.773541,0.0,0.0,-666666666,0.0,0.0,0
36,120119800001,0.0,0.043478,3.735059,0.0,0.0,-666666666,0.0,0.0,0
37,060014090002,0.0,0.947368,3.747324,0.0,0.0,-666666666,0.0,0.0,0
38,440050402002,0.0,0.951220,11.806344,0.0,0.0,-666666666,0.0,0.0,0
39,060310017015,0.0,0.002045,245.034009,0.0,0.0,-666666666,0.0,0.0,0
40,060590219241,0.0,0.010753,261.805242,0.0,0.0,-666666666,0.0,0.0,0
41,260999821001,0.0,0.939655,170.048066,0.0,0.0,-666666666,0.0,0.0,0
42,511539801001,0.0,0.009009,70.611329,0.0,0.0,-666666666,0.0,0.0,0
