In [9]:
import pandas as pd
import numpy as np

In [10]:

df = pd.read_csv("pittsburghArrest.csv")

df = df[["AGE", "INCIDENTNEIGHBORHOOD"]]
df = df.dropna(subset=["AGE", "INCIDENTNEIGHBORHOOD"])
df["AGE"] = pd.to_numeric(df["AGE"], errors="coerce")
df = df.dropna(subset=["AGE"])

df["AGE_GROUP"] = df["AGE"].apply(lambda x: "<18" if x < 18 else "18+")

grouped = df.groupby(["INCIDENTNEIGHBORHOOD", "AGE_GROUP"]).size().unstack(fill_value=0)

grouped["Total_Count"] = grouped["<18"] + grouped["18+"]
grouped["Under_18_Percent_Local"] = grouped["<18"] / grouped["Total_Count"]
grouped["Over_18_Percent_Local"] = grouped["18+"] / grouped["Total_Count"]

total_under_18 = grouped["<18"].sum()
total_over_18 = grouped["18+"].sum()

grouped["Under_18_Percent_Global"] = grouped["<18"] / total_under_18
grouped["Over_18_Percent_Global"] = grouped["18+"] / total_over_18

result = grouped.reset_index().rename(columns={
    "INCIDENTNEIGHBORHOOD": "NEIGHBORHOOD",
    "<18": "Under_18_Count",
    "18+": "Over_18_Count"
})

result = result[
    ["NEIGHBORHOOD", "Under_18_Count", "Over_18_Count", "Total_Count",
     "Under_18_Percent_Local", "Over_18_Percent_Local",
     "Under_18_Percent_Global", "Over_18_Percent_Global"]
]

result.to_csv("summaryPittArrest.csv", index=False)


In [11]:
df = pd.read_csv("pittsburghParks.csv")


df = df[["type", "neighborhood"]]
df = df.dropna(subset=["type", "neighborhood"])


df["IS_PARK"] = df["type"].apply(lambda x: "Park" if x.strip().lower() == "park" else "Other")

grouped = df.groupby(["neighborhood", "IS_PARK"]).size().unstack(fill_value=0)

if "Park" not in grouped.columns:
    grouped["Park"] = 0
if "Other" not in grouped.columns:
    grouped["Other"] = 0

total_park = grouped["Park"].sum()
total_other = grouped["Other"].sum()

grouped["Park_Global_Percent"] = grouped["Park"] / total_park
grouped["Other_Global_Percent"] = grouped["Other"] / total_other

result = grouped.reset_index().rename(columns={
    "neighborhood": "NEIGHBORHOOD",
    "Park": "Park_Count",
    "Other": "Non_Park_Count"
})

result = result[
    ["NEIGHBORHOOD", "Park_Count", "Non_Park_Count", "Park_Global_Percent", "Other_Global_Percent"]
]

result.to_csv("summaryPittPark.csv", index=False)


In [12]:

df = pd.read_csv("pittsburghFacility.csv")

df = df[["type", "neighborhood"]]
df = df.dropna(subset=["type", "neighborhood"])

all_types = df["type"].unique()

pivot_table = df.groupby(["neighborhood", "type"]).size().unstack(fill_value=0)

type_totals = pivot_table.sum()

type_percents = pivot_table.divide(type_totals)

final_df = pivot_table.copy()
for col in pivot_table.columns:
    final_df[f"{col}_Count"] = pivot_table[col]
    final_df[f"{col}_Percent"] = type_percents[col]
    final_df.drop(columns=[col], inplace=True)
    
final_df["Total_Facilities"] = final_df[[col for col in final_df.columns if col.endswith("_Count")]].sum(axis=1)
final_df = final_df.reset_index().rename(columns={"neighborhood": "NEIGHBORHOOD"})

count_cols = [col for col in final_df.columns if col.endswith("_Count")]
percent_cols = [col for col in final_df.columns if col.endswith("_Percent")]
final_df = final_df[["NEIGHBORHOOD", "Total_Facilities"] + count_cols + percent_cols]

final_df.to_csv("summaryPittFacilities.csv", index=False)


In [13]:

df = pd.read_csv("pittsburghSteps.csv")

df = df[["neighborhood", "length", "number_of_steps"]]
df = df.dropna(subset=["neighborhood", "length", "number_of_steps"])

df_with_steps = df[df["number_of_steps"] > 0]
df_no_steps = df[df["number_of_steps"] == 0]

grouped_steps = df_with_steps.groupby("neighborhood").agg({
    "length": "sum",
    "number_of_steps": "sum"
}).rename(columns={
    "length": "Total_Length_With_Steps",
    "number_of_steps": "Total_Steps"
})

grouped_steps["Length_per_Step"] = grouped_steps["Total_Length_With_Steps"] / grouped_steps["Total_Steps"]

grouped_no_steps = df_no_steps.groupby("neighborhood").agg({
    "length": "sum"
}).rename(columns={"length": "Zero_Steps_Total_Length"})

final_df = grouped_steps.join(grouped_no_steps, how="outer").fillna(0)

final_df = final_df.reset_index().rename(columns={"neighborhood": "NEIGHBORHOOD"})

final_df.to_csv("summaryPittSteps.csv", index=False)

In [14]:
import pandas as pd

arrest_df = pd.read_csv("summaryPittArrest.csv")
park_df = pd.read_csv("summaryPittPark.csv")
fac_df = pd.read_csv("summaryPittFacilities.csv")
step_df = pd.read_csv("summaryPittSteps.csv")

arrest_neigh = set(arrest_df["NEIGHBORHOOD"])
fac_neigh = set(fac_df["NEIGHBORHOOD"])
step_neigh = set(step_df["NEIGHBORHOOD"])
park_neigh = set(park_df["NEIGHBORHOOD"])

core_common = arrest_neigh & fac_neigh & step_neigh

all_core = arrest_neigh | fac_neigh | step_neigh
missing_core = all_core - core_common
if missing_core:
    print("The following neighborhood is missing in the dataset(Arrest/Facilities/Steps), and will not be counted:")
    for name in sorted(missing_core):
        print("-", name)

arrest_df = arrest_df[arrest_df["NEIGHBORHOOD"].isin(core_common)]
fac_df = fac_df[fac_df["NEIGHBORHOOD"].isin(core_common)]
step_df = step_df[step_df["NEIGHBORHOOD"].isin(core_common)]

park_df = park_df[park_df["NEIGHBORHOOD"].isin(core_common)]

merged = arrest_df.merge(fac_df, on="NEIGHBORHOOD")
merged = merged.merge(step_df, on="NEIGHBORHOOD")
merged = merged.merge(park_df, on="NEIGHBORHOOD", how="left")

merged = merged.fillna(0)

merged.to_csv("summaryPitt_ALL_MERGED.csv", index=False)


The following neighborhood is missing in the dataset(Arrest/Facilities/Steps), and will not be counted:
- Allegheny West
- Arlington
- Arlington Heights
- Central North Side
- Chartiers City
- Chateau
- East Carnegie
- Esplen
- Fairywood
- Friendship
- Golden Triangle/Civic Arena
- Hays
- Homewood South
- Homewood West
- Mount Oliver
- Mount Oliver Borough
- Mt. Oliver
- Mt. Oliver Boro
- Mt. Oliver Neighborhood
- New Homestead
- North Shore
- Northview Heights
- Outside City
- Outside County
- Outside State
- Ridgemont
- South Shore
- Spring Garden
- St. Clair
- Summer Hill
- Swisshelm Park
- Troy Hill-Herrs Island
