In [3]:
import pandas as pd
import ipywidgets as widgets
from IPython.display import display


pd.set_option('display.max_columns', None)

import warnings

warnings.filterwarnings("ignore")

# Check Block Group to Project
This robusness check is to view whether the projects that are built suitability score measure up to the Block Group level suitability score. 

This lets us measure how good of a statistic is the county level suitability score when analyzing project level factors.

In [4]:
# Load block group data
suitability_block_group = pd.read_csv("/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Solar NIMBY Final/Solar-NIMBY/regression/cleaned data bg/block_group_suitability_scores.csv", dtype={"GEOID": str})
FIPS = pd.read_csv("/Users/jack/Library/CloudStorage/GoogleDrive-limjackailjk@gmail.com/My Drive/Solar PV Lab/NIMBY Project/Solar NIMBY Final/Solar-NIMBY/data cleaning/data/FIPS.csv", dtype={"FIPS State": str, "FIPS County": str})

# Merge FIPS data to get GEOID
suitability_block_group = suitability_block_group.merge(FIPS, left_on=["State", "County Name"], right_on=["State", "County Name"], how="left")

# Load Project level suitability data
suitability_proj = pd.read_csv("data/suitability data/suitability_project_level.csv", dtype={"GEOID": str, 'STATEFP': str, 'COUNTYFP': str, 'TRACTCE': str, 'BLKGRPCE': str})

In [7]:
# List of factor to check
factors = ['GHI', 'Protected_Land', 'Habitat', 'Slope', 'Population_Density', 'Distance_to_Substation', 'Land_Cover']


def create_analysis_df(factor, no_matches, asc = True, list_view=True, project_size='all'):
    # Sort and filter to get the factors suitability at bg level
    factor_suitability = suitability_block_group.sort_values(factor, ascending=True)
    factor_suitability = factor_suitability[['State', 'County Name', 'GEOID'] + [factor]].dropna()
    
    suitability_factor_proj = suitability_proj[['GEOID', 'STATEFP', 'COUNTYFP', 'TRACTCE', 'BLKGRPCE', 'Wattage'] + factors]
    
    
    if project_size == 'small':
        suitability_factor_proj = suitability_factor_proj[suitability_factor_proj['Wattage'] < 5]
    elif project_size == 'medium':
        suitability_factor_proj = suitability_factor_proj[suitability_factor_proj['Wattage'] >= 5]
        suitability_factor_proj = suitability_factor_proj[suitability_factor_proj['Wattage'] < 25]
    elif project_size == 'large':
        suitability_factor_proj = suitability_factor_proj[suitability_factor_proj['Wattage'] >= 25]
    
    # Get the matches with teh projects
    project_matches_mean = []
    project_matches_list = []
    for i in range(len(factor_suitability)):
        # get the geoid
        geoid = factor_suitability.iloc[i]["GEOID"]
        if geoid in suitability_factor_proj["GEOID"].values:
            # add the rank of the project
            proj_matches = suitability_factor_proj[suitability_factor_proj["GEOID"] == geoid]
            
            # Handle the mean df
            proj_matches_mean_factor = proj_matches[factor].mean()
            
            # construct the row for the mean
            row = factor_suitability.iloc[i]
            row[f"Project_Mean_{factor}"] = proj_matches_mean_factor
            row["block group rank"] = i + 1
            project_matches_mean.append(row)
            
            # Handle the list df
            proj_matches["block group rank"] = i + 1
            # get the county names and state names for the projects
            project_matches_list_df = proj_matches.merge(FIPS, left_on=["STATEFP", "COUNTYFP"], right_on=["FIPS State", "FIPS County"], how="left")
            project_matches_list_df = project_matches_list_df[['State', 'County Name', 'GEOID', 'STATEFP', 'COUNTYFP', 'TRACTCE', 'BLKGRPCE', 'Wattage', 'block group rank'] + factors].reset_index().drop(columns=["index"])
            
            project_matches_list.append(project_matches_list_df)
            
            
        # Break when we get x matches
        if len(project_matches_mean) >= no_matches:
            break
        
    if list_view:
        return pd.concat(project_matches_list)
    if not list_view:
        return pd.DataFrame(project_matches_mean)

In [8]:
dropdown = widgets.Dropdown(
    options=factors,
    description='Select DF:'
)

dropdown_project_size = widgets.Dropdown(
    options=['all', 'small', 'medium', 'large'],
    description='Select Project Size:'
)

# add an inpout for selecting a integer
no_matches = widgets.IntText(
    value=10,
    description='No of Matches:',
    disabled=False
)

# Create a button to trigger the display
button = widgets.Button(description="Display")
output = widgets.Output()

def on_button_clicked(b):
    with output:
        output.clear_output()
        display(create_analysis_df(dropdown.value, no_matches.value, project_size=dropdown_project_size.value))
        
button.on_click(on_button_clicked)

display(dropdown)
display(no_matches)
display(dropdown_project_size)
display(button)
display(output)

Dropdown(description='Select DF:', options=('GHI', 'Protected_Land', 'Habitat', 'Slope', 'Population_Density',…

IntText(value=10, description='No of Matches:')

Dropdown(description='Select Project Size:', options=('all', 'small', 'medium', 'large'), value='all')

Button(description='Display', style=ButtonStyle())

Output()