In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
land_df = pd.read_csv("LandParcels.2023.csv")

In [None]:
land_df.columns

Index(['Unnamed: 0', 'Land_Parcel_ID', 'property_N', 'unit_N', 'unit_N_orig',
       'AV_LAND', 'AV_BLDG', 'AV_TOTAL', 'LAND_SF', 'GROSS_AREA',
       'LIVING_AREA', 'LU', 'OWN_OCC', 'NUM_FLOORS', 'YR_BUILT', 'YR_REMOD',
       'owner_address', 'comcenter', 'medhos', 'supermkt', 'parking', 'vacant',
       'rel', 'lib', 'bps', 'police', 'fire', 'private', 'mbta_stop',
       'college', 'hlth_cntr', 'sub_house', 'X', 'Y', 'TLID', 'Blk_ID_10',
       'BG_ID_10', 'CT_ID_10', 'Blk_ID_20', 'BG_ID_20', 'CT_ID_20', 'NSA_Name',
       'BRA_PD', 'full_address'],
      dtype='object')

In [None]:
land_df["BRA_PD"].value_counts()

BRA_PD
South Dorchester        11975
Roxbury                  9887
West Roxbury             9690
Hyde Park                8506
Roslindale               7422
Allston/Brighton         7416
Mattapan                 6710
South Boston             6506
East Boston              6394
Jamaica Plain            6023
North Dorchester         4156
South End                3193
Charlestown              3026
Back Bay/Beacon Hill     2917
Central                  2550
Fenway/Kenmore           1349
Name: count, dtype: int64

In [None]:
columns = ['comcenter', 'medhos', 'supermkt', 'lib', 'bps', 'police']

def calculate_use(row):
    for loc in columns:
        if row[loc]:
            return loc
    return "OTHER"

land_df["USE"] = land_df.apply(calculate_use, axis=1)

In [None]:
neighborhoods = pd.Series(land_df["BRA_PD"].values).dropna().unique()
fig, axs = plt.subplots(ncols=3, nrows=6, sharey=False, figsize=(20, 25), dpi=300)
params = {'mbta_stop': 'firebrick','bps': 'cornflowerblue','comcenter': 'chocolate',
          'medhos': 'palegreen','supermkt': 'indigo','rel': 'forestgreen',
          'lib': 'orchid', 'private': 'gold', 
          'hlth_cntr': 'lightblue',
          'OTHER': 'gray', 'police': 'yellow'}
for i, neighborhood in enumerate(neighborhoods):
    col = i % 3
    row = i // 3
    sub_df_1 = land_df[(land_df["BRA_PD"] == neighborhood) & (land_df["USE"] != "OTHER")]
    sub_df_2 = land_df[(land_df["BRA_PD"] == neighborhood) & (land_df["USE"] == "OTHER")]
    sns.scatterplot(x=sub_df_2["X"], y=sub_df_2["Y"], hue=sub_df_2["USE"], ax=axs[row][col], alpha = 0.05, palette=params)
    sns.scatterplot(x=sub_df_1["X"], y=sub_df_1["Y"], hue=sub_df_1["USE"], ax=axs[row][col], palette=params).set_title(neighborhood)

plt.tight_layout()

In [None]:
res_1_fam = ["R1"]
res_2_fam = ["R2"]
res_3_plus = ["R3", "R4", "RL - RL", "A", "AH"]
condos = ["CD", "CC", "CM", "CP"]
industrial = ["I"]
exempt = ["E", "EA"]
mixed = ["RC"]
commercial = ["C", "RC", "CL"]

def calculate_land_use(row):
    el = row["LU"]
    if el in res_1_fam:
        return "res_1_fam"
    elif el in res_2_fam:
        return "res_2_fam"
    elif el in res_3_plus:
        return "res_3_plus"
    elif el in condos:
        return "condos"
    elif el in industrial:
        return "industrial"
    elif el in exempt:
        return "exempt"
    elif el in mixed:
        return "mixed"
    elif el in commercial:
        return "commercial"

land_df["LAND_USE"] = land_df.apply(calculate_land_use, axis=1)

In [None]:
neighborhoods = pd.Series(land_df["BRA_PD"].values).dropna().unique()
fig, axs = plt.subplots(ncols=3, nrows=6, sharey=False, figsize=(20, 25), dpi=300)
params = {'res_1_fam': 'firebrick','res_2_fam': 'cornflowerblue','res_3_plus': 'chocolate',
          'condos': 'palegreen','commercial': 'indigo','industrial': 'gray',
          'exempt': 'orchid', 'mixed': 'lightblue'}
params_2 = {True: 'black'}
for i, neighborhood in enumerate(neighborhoods):
    col = i % 3
    row = i // 3
    sub_df = land_df[(land_df["BRA_PD"] == neighborhood)]
    sub_df_2 = land_df[(land_df["BRA_PD"] == neighborhood) & (land_df["USE"] != "OTHER")]
    sub_df_2.loc[:,"IS_PS"] = sub_df_2.loc[:,"USE"] != "OTHER"
    sns.scatterplot(x=sub_df["X"], y=sub_df["Y"], hue=sub_df["LAND_USE"], ax=axs[row][col], alpha = 0.25, palette=params)
    sns.scatterplot(x=sub_df["X"], y=sub_df["Y"], hue=sub_df_2["IS_PS"], ax=axs[row][col], alpha = 1, palette=params_2).set_title(neighborhood)

plt.tight_layout()

In [None]:
census_data = pd.read_csv("ACS_1519_BLKGRP.csv")

def apply_census(row, column):
    try:
        return census_data[census_data["BG_ID_10"] == row["BG_ID_10"]].iloc[0][column]
    except:
        return None
land_df["med_income"] = land_df.apply(lambda row: apply_census(row, "MedHouseIncome"), axis=1)
land_df["white"] = land_df.apply(lambda row: apply_census(row, "White"), axis=1)
land_df["black"] = land_df.apply(lambda row: apply_census(row, "Black"), axis=1)
land_df["asian"] = land_df.apply(lambda row: apply_census(row, "Asian"), axis=1)

In [None]:
neighborhood1 = "Mattapan"
neighborhood2 = "North Dorchester"
fig, axs = plt.subplots(ncols=1, nrows=1, sharey=False, figsize=(20, 15), dpi=300)
params = {'res_1_fam': 'firebrick','res_2_fam': 'cornflowerblue','res_3_plus': 'chocolate',
          'condos': 'palegreen','commercial': 'indigo','industrial': 'gray',
          'exempt': 'orchid', 'mixed': 'lightblue'}
sub_df = land_df[(land_df["BRA_PD"] == neighborhood1) | (land_df["BRA_PD"] == neighborhood2)]
sns.scatterplot(x=sub_df["X"], y=sub_df["Y"], hue=sub_df["LAND_USE"], alpha = 0.5, size=land_df["med_income"], palette=params, sizes=(.1, 400))

sub_df_2 = land_df[(land_df["BRA_PD"] == neighborhood1) | (land_df["BRA_PD"] == neighborhood2) & (land_df["USE"] != "OTHER")]

# could probably accomplish via a .apply
sub_df_2.loc[:,"IS_PS"] = sub_df_2.loc[:,"USE"] != "OTHER"
sns.scatterplot(x=sub_df["X"], y=sub_df["Y"], hue=sub_df_2["IS_PS"], alpha = 1, palette=params_2, markers=['s'], s=150).set_title(neighborhood1 + neighborhood2)

plt.tight_layout()

In [None]:
neighborhoods = pd.Series(land_df["BRA_PD"].values).dropna().unique()
fig, axs = plt.subplots(ncols=1, nrows=1, sharey=False, figsize=(20, 15), dpi=300)
params = {'res_1_fam': 'firebrick','res_2_fam': 'cornflowerblue','res_3_plus': 'chocolate',
          'condos': 'palegreen','commercial': 'indigo','industrial': 'gray',
          'exempt': 'orchid', 'mixed': 'lightblue'}

params_ps = {'mbta_stop': 'firebrick','bps': 'cornflowerblue','comcenter': 'chocolate',
          'medhos': 'palegreen','supermkt': 'white','rel': 'forestgreen',
          'lib': 'orchid', 'private': 'gold', 
          'hlth_cntr': 'lightblue',
          'OTHER': 'gray', 'police': 'yellow'}

params_2 = {True: 'black'}
sub_df_2 = land_df[(land_df["USE"] != "OTHER")]
sub_df_2.loc[:,"IS_PS"] = sub_df_2.loc[:,"USE"] != "OTHER"

temp_df = land_df.sample(frac=0.04)

temp_df.rename(columns={"black": "percent_black"}, inplace=True)
sub_df_2.rename(columns={"USE": "AMENITIES vs INCOME and RACE"}, inplace=True)

# sns.scatterplot(x=land_df["X"], y=land_df["Y"], hue=land_df["LAND_USE"], alpha = 0.25, size=land_df["med_income"], palette=params, sizes=(.1, 400))

sns.scatterplot(x=temp_df["X"], y=temp_df["Y"], hue=temp_df["percent_black"], alpha = 0.35, size=temp_df["med_income"], sizes=(.01, 500), edgecolor='black')

sns.scatterplot(x=land_df["X"], y=land_df["Y"], hue=sub_df_2["AMENITIES vs INCOME and RACE"], alpha = 1, palette=params_ps, edgecolor='black', marker='D')