In [151]:
import sys;
sys.path.append("../talk_business")
from talk_business.utils.sql import neighborhood_explorer as ne

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

pio.templates.default="plotly"

In [None]:
def plot_donut(data: pd.DataFrame) -> go.Figure:
    """Return a donut chart from a Series."""
    fig = go.Figure(
        data=[
            go.Pie(
                labels=data["RACE_GROUPS"],
                values=data["population"],
                hole=0.5,
                textinfo="label+percent",
                sort=False,
                direction="clockwise",
            )
        ]
    )
    fig.update_layout(
        margin=dict(l=0, r=0, t=70, b=0),
        showlegend=False,
        title=dict(
            x=0.5,
            xanchor="center",
        ),
        height=300,
    )
    return fig

In [None]:
DISTRIBUTIONS = [
    "AGE_GROUPS",
    "ENROLLMENT_GROUPS",
    "FAMILY_INCOME_GROUPS",
    "OCCUPATION_GROUPS",
    "RACE_GROUPS",
    "RENT_GROUPS",
]

## Age

In [166]:


LABELS = {
    "AGE_GROUPS": "Age",
    "ENROLLMENT_GROUPS": "School Enrollment",
    "FAMILY_INCOME_GROUPS": "Family Income (Yearly)",
    "OCCUPATION_GROUPS": "Occupation",
    "RACE_GROUPS": "Race",
    "RENT_GROUPS": "Rent (Monthly)",
    "population": "Population",
    "age_category": "Age Group",
    "income_groups": "Family Income",
}

AGE_CATEGORY = {
    "00-05": "Children",
    "05-10": "Children",
    "10-14": "Adolescents",
    "15-19": "Adolescents",
    "20-24": "Young Adults",
    "25-29": "Young Adults",
    "30-34": "Adults",
    "35-39": "Adults",
    "40-44": "Adults",
    "45-49": "Adults",
    "50-54": "Adults",
    "55-59": "Adults",
    "60-64": "Older adults",
    "65-69": "Older adults",
    "70-74": "Older adults",
    "75-79": "Older adults",
    "80-84": "Older adults",
    "85-100": "Older adults",
}

FAMILY_INCOME_GROUPPED = {
    "0-10000": "$0-$25,000",
    "10000-14999": "$0-$25,000",
    "15000-19999": "$0-$25,000",
    "20000-24999": "$0-$25,000",
    "25000-29999": "$25,000-$50,000",
    "30000-34999": "$25,000-$50,000",
    "35000-39999": "$25,000-$50,000",
    "40000-44999": "$25,000-$50,000",
    "45000-49999": "$25,000-$50,000",
    "50000-59999": "$50,000-$75,000",
    "60000-74999": "$50,000-$75,000",
    "75000-99999": "$75,000-$100,000",
    "100000-124999": "$100,000-$125,000",
    "125000-149999": "$125,000-$150,000",
    "150000-199999": "$150,000-$200,000",
    "200000-": "+$200,000",
}

OCCUPATION_MAPPER = {
    "1": "Building and grounds cleaning and maintenance occupations",
    "2": "Computer engineering and science occupations",
    "3": "Construction and extraction occupations",
    "4": "Education legal community service arts and media occupations",
    "5": "Farming fishing and forestry occupations",
    "6": "Food preparation and serving related occupations",
    "7": "Healthcare practitioners and technical occupations",
    "8": "Healthcare support occupations",
    "9": "Installation maintenance and repair occupations",
    "10": "Management business and financial occupations",
    "11": "Material moving occupations",
    "12": "Office and administrative support occupations",
    "13": "Personal care and service occupations",
    "14": "Production occupations",
    "15": "Protective service occupations",
    "16": "Sales and related occupations",
    "17": "Transportation occupations",
}

RACE_GROUPS_MAPPER = {
    "WHITE": "WHITE",
    "BLACK_OR_AFRICAN_AMERICAN": "BLACK_OR_AFRICAN_AMERICAN",
    "AMERICAN_INDIAN_AND_ALASKA_NATIVE": "SOME_OTHER_RACE",
    "ASIAN": "ASIAN",
    "NATIVE_HAWAIIAN_AND_OTHER_PACIFIC_ISLANDER_ALONE": "SOME_OTHER_RACE",
    "SOME_OTHER_RACE": "SOME_OTHER_RACE",
    "TWO_OR_MORE_RACES": "SOME_OTHER_RACE",
}

RENT_MAPPER = {
    "0-100": "$0-$500",
    "100-149": "$0-$500",
    "150-199": "$0-$500",
    "200-249": "$0-$500",
    "250-299": "$0-$500",
    "300-349": "$0-$500",
    "350-399": "$0-$500",
    "400-449": "$0-$500",
    "450-499": "$0-$500",
    "500-549": "$500-$1,000",
    "550-599": "$500-$1,000",
    "600-649": "$500-$1,000",
    "650-699": "$500-$1,000",
    "700-749": "$500-$1,000",
    "750-799": "$500-$1,000",
    "800-899": "$500-$1,000",
    "900-999": "$500-$1,000",
    "1000-1249": "$1,000-$1,500",
    "1250-1499": "$1,000-$1,500",
    "1500-1999": "$1,500-$2,000",
    "2000-2499": "$2,000-$2,500",
    "2500-2999": "$2,500-$3,000",
    "3000-3499": "$3,000-$3,500",
    "3500-": "+$3500",
}

In [56]:
def plot_age_distribution(age_distribution: pd.DataFrame) -> tuple[go.Figure, go.Figure]:
    fig = px.bar(
        age_distribution, x="AGE_GROUPS", y="population", text_auto=",.0f", color="age_category", labels=LABELS
    )
    fig.update_layout(xaxis_type="category")

    summary = age_distribution.groupby("age_category")["population"].sum().reset_index()
    summary_fig = px.bar(summary, x="age_category", y="population", text_auto=",.0f", labels=LABELS)
    return fig, summary_fig
    

In [None]:
def plot_enrollment(enrollment_distribution: pd.DataFrame):
    return px.bar(
        enrollment_distribution,
        x="ENROLLMENT_GROUPS",
        y="population",
        text_auto=",.0f",
        labels=LABELS,
    )

In [167]:
def plot_distribution(style: str):
    if style == "AGE_GROUPS":
        import pandas as pd

        distribution = ne.get_distribution("AGE_GROUPS", "Mott Haven-Port Morris")
        age_distribution = distribution.assign(
            age_category=lambda df: df["AGE_GROUPS"].map(AGE_CATEGORY),
        ).assign(
            age_category=lambda df: pd.Categorical(
                df["age_category"], categories=df.age_category.unique(), ordered=True
            )
        )

        return plot_age_distribution(age_distribution)

    elif style == "ENROLLMENT_GROUPS":
        enrollment_distribution = ne.get_distribution(
            "ENROLLMENT_GROUPS", "Mott Haven-Port Morris"
        )
        enrollment_distribution = enrollment_distribution.assign(
            ENROLLMENT_GROUPS=lambda df: pd.Categorical(
                df["ENROLLMENT_GROUPS"],
                categories=[
                    "ELEMENTARY_SCHOOL",
                    "MIDDLE_SCHOOL",
                    "HIGH_SCHOOL",
                    "HIGHER_EDUCATION",
                ],
                ordered=True,
            )
        ).sort_values("ENROLLMENT_GROUPS")
        plot_enrollment(enrollment_distribution)
    elif style == "FAMILY_INCOME_GROUPS":
        family_income_distribution = distribution = ne.get_distribution(
            "FAMILY_INCOME_GROUPS", "Mott Haven-Port Morris"
        )
        family_income_distribution = (
            family_income_distribution.assign(
                income_groups=lambda df: pd.Categorical(
                    df["FAMILY_INCOME_GROUPS"].map(FAMILY_INCOME_GROUPPED),
                    categories=pd.Series(FAMILY_INCOME_GROUPPED).unique(),
                    ordered=True,
                )
            )
            .groupby("income_groups", as_index=False)["population"]
            .sum()
        )
        fig = px.bar(
            distribution, x="income_groups", y="population", text_auto=",.0f", labels=LABELS
        )
        fig.update_layout(xaxis_type="category")
        return fig

    elif style == "OCCUPATION_GROUPS":
        occupation_dist = ne.get_distribution("OCCUPATION_GROUPS", "Mott Haven-Port Morris")
        top_occupation = (
            occupation_dist.nlargest(10, "population")
            .sort_values("population", ascending=True)
            .assign(
                OCCUPATION_GROUPS=lambda df: df["OCCUPATION_GROUPS"]
                .map(OCCUPATION_MAPPER)
                .str.replace(" occupations", "")
            )
        )

        fig = px.bar(
            top_occupation,
            y="OCCUPATION_GROUPS",
            x="population",
            text_auto=",.0f",
            labels=LABELS,
            orientation="h",
        )
        fig.update_layout(margin=dict(l=200, r=0, t=0, b=40))
        return fig

    elif style == "RACE_GROUPS":
        race_distribution = ne.get_distribution("RACE_GROUPS", "Mott Haven-Port Morris")
        race_distribution = (
            race_distribution.assign(
                RACE_GROUPS=lambda df: df["RACE_GROUPS"].map(RACE_GROUPS_MAPPER)
            )
            .groupby("RACE_GROUPS", as_index=False)["population"]
            .sum()
            .sort_values("population", ascending=False)
        )
        return plot_donut(race_distribution)
    elif style == "RENT_GROUPS":
        rent_groups = ne.get_distribution("RENT_GROUPS", "Mott Haven-Port Morris")
        rent_distribution = (
            rent_groups.assign(
                RENT_GROUPS=lambda df: pd.Categorical(
                    df["RENT_GROUPS"].map(RENT_MAPPER),
                    categories=pd.Series(RENT_MAPPER).unique(),
                    ordered=True,
                )
            )
            .groupby("RENT_GROUPS", as_index=False)["population"]
            .sum()
            .sort_values("population", ascending=False)
            .sort_values("RENT_GROUPS")
        )
        fig px.bar(
            rent_distribution, x="RENT_GROUPS", y="population", text_auto=",.0f", labels=LABELS
        )
    return fig

IndentationError: expected an indented block after function definition on line 1 (1167895496.py, line 2)

2023-04-19 18:47:27.404 query: [SELECT * FROM PERSONAL.PUBLIC.RENT_GROUPS WHERE NTANAME = 'Mott Haven-Port Morri...]
2023-04-19 18:47:28.792 query execution done
2023-04-19 18:47:28.792 Number of results in first chunk: 1
