In [27]:
import sys;
sys.path.insert(0, "/Users/joselondono/Documents/projects/talk-business")
from talk_business.utils.sql import neighborhood_explorer as ne
from talk_business.utils.plots import neighborhood as nplot
from talk_business.utils.transformers import neighborhood as ntransform

import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
import pandas as pd

pio.templates.default="plotly"


## Age

In [8]:


LABELS = {
    "AGE_GROUPS": "Age",
    "ENROLLMENT_GROUPS": "School Enrollment",
    "FAMILY_INCOME_GROUPS": "Family Income (Yearly)",
    "OCCUPATION_GROUPS": "Occupation",
    "RACE_GROUPS": "Race",
    "RENT_GROUPS": "Rent (Monthly)",
    "population": "Population",
    "age_category": "Age Group",
    "income_groups": "Family Income",
}

AGE_CATEGORY = {
    "00-05": "Children",
    "05-10": "Children",
    "10-14": "Adolescents",
    "15-19": "Adolescents",
    "20-24": "Young Adults",
    "25-29": "Young Adults",
    "30-34": "Adults",
    "35-39": "Adults",
    "40-44": "Adults",
    "45-49": "Adults",
    "50-54": "Adults",
    "55-59": "Adults",
    "60-64": "Older adults",
    "65-69": "Older adults",
    "70-74": "Older adults",
    "75-79": "Older adults",
    "80-84": "Older adults",
    "85-100": "Older adults",
}

FAMILY_INCOME_GROUPPED = {
    "0-10000": "$0-$25,000",
    "10000-14999": "$0-$25,000",
    "15000-19999": "$0-$25,000",
    "20000-24999": "$0-$25,000",
    "25000-29999": "$25,000-$50,000",
    "30000-34999": "$25,000-$50,000",
    "35000-39999": "$25,000-$50,000",
    "40000-44999": "$25,000-$50,000",
    "45000-49999": "$25,000-$50,000",
    "50000-59999": "$50,000-$75,000",
    "60000-74999": "$50,000-$75,000",
    "75000-99999": "$75,000-$100,000",
    "100000-124999": "$100,000-$125,000",
    "125000-149999": "$125,000-$150,000",
    "150000-199999": "$150,000-$200,000",
    "200000-": "+$200,000",
}

OCCUPATION_MAPPER = {
    "1": "Building and grounds cleaning and maintenance occupations",
    "2": "Computer engineering and science occupations",
    "3": "Construction and extraction occupations",
    "4": "Education legal community service arts and media occupations",
    "5": "Farming fishing and forestry occupations",
    "6": "Food preparation and serving related occupations",
    "7": "Healthcare practitioners and technical occupations",
    "8": "Healthcare support occupations",
    "9": "Installation maintenance and repair occupations",
    "10": "Management business and financial occupations",
    "11": "Material moving occupations",
    "12": "Office and administrative support occupations",
    "13": "Personal care and service occupations",
    "14": "Production occupations",
    "15": "Protective service occupations",
    "16": "Sales and related occupations",
    "17": "Transportation occupations",
}

RACE_GROUPS_MAPPER = {
    "WHITE": "WHITE",
    "BLACK_OR_AFRICAN_AMERICAN": "BLACK_OR_AFRICAN_AMERICAN",
    "AMERICAN_INDIAN_AND_ALASKA_NATIVE": "SOME_OTHER_RACE",
    "ASIAN": "ASIAN",
    "NATIVE_HAWAIIAN_AND_OTHER_PACIFIC_ISLANDER_ALONE": "SOME_OTHER_RACE",
    "SOME_OTHER_RACE": "SOME_OTHER_RACE",
    "TWO_OR_MORE_RACES": "SOME_OTHER_RACE",
}

RENT_MAPPER = {
    "0-100": "$0-$500",
    "100-149": "$0-$500",
    "150-199": "$0-$500",
    "200-249": "$0-$500",
    "250-299": "$0-$500",
    "300-349": "$0-$500",
    "350-399": "$0-$500",
    "400-449": "$0-$500",
    "450-499": "$0-$500",
    "500-549": "$500-$1,000",
    "550-599": "$500-$1,000",
    "600-649": "$500-$1,000",
    "650-699": "$500-$1,000",
    "700-749": "$500-$1,000",
    "750-799": "$500-$1,000",
    "800-899": "$500-$1,000",
    "900-999": "$500-$1,000",
    "1000-1249": "$1,000-$1,500",
    "1250-1499": "$1,000-$1,500",
    "1500-1999": "$1,500-$2,000",
    "2000-2499": "$2,000-$2,500",
    "2500-2999": "$2,500-$3,000",
    "3000-3499": "$3,000-$3,500",
    "3500-": "+$3500",
}

SORTED_ENROLLMENT = [
    "ELEMENTARY_SCHOOL",
    "MIDDLE_SCHOOL",
    "HIGH_SCHOOL",
    "HIGHER_EDUCATION",
]

In [36]:
def plot_distribution(style: str):
    if style == "AGE_GROUPS":
        age = ne.get_distribution("AGE_GROUPS", "Mott Haven-Port Morris")
        age = age.assign(
            age_category=lambda df: ntransform.map_to_category(
                df["AGE_GROUPS"], AGE_CATEGORY
            )
        )
        return nplot.distribution(
            age,
            "AGE_GROUPS",
            title="Age Distribution",
            color="age_category",
        )

    elif style == "ENROLLMENT_GROUPS":
        enrollment = ne.get_distribution("ENROLLMENT_GROUPS", "Mott Haven-Port Morris")
        enrollment = enrollment.pipe(
            ntransform.resort_categories, "ENROLLMENT_GROUPS", SORTED_ENROLLMENT
        )
        return nplot.distribution(
            enrollment,
            "ENROLLMENT_GROUPS",
            title="Stage of Studies",
        )
    elif style == "FAMILY_INCOME_GROUPS":
        income = ne.get_distribution("FAMILY_INCOME_GROUPS", "Mott Haven-Port Morris")
        income = ntransform.aggregate(
            income, "FAMILY_INCOME_GROUPS", FAMILY_INCOME_GROUPPED
        )
        return nplot.distribution(
            income,
            "income_groups",
            title="Age Distribution",
        )

    elif style == "OCCUPATION_GROUPS":
        occupation = ne.get_distribution("OCCUPATION_GROUPS", "Mott Haven-Port Morris")
        occupation = ntransform.parse_occupation(occupation, OCCUPATION_MAPPER, 10)
        return nplot.distribution(
            occupation, "OCCUPATION_GROUPS", title="Top 10 Common Jobs", orient="h"
        )

    elif style == "RACE_GROUPS":
        race = ne.get_distribution("RACE_GROUPS", "Mott Haven-Port Morris")
        race = ntransform.aggregate(race, "RACE_GROUPS", RACE_GROUPS_MAPPER, False)
        return nplot.plot_donut(race, "RACE_GROUPS", "Racial Profile"")

    elif style == "RENT_GROUPS":
        rent = ne.get_distribution("RENT_GROUPS", "Mott Haven-Port Morris")
        rent = ntransform.aggregate(rent, "RENT_GROUPS", RENT_MAPPER)
        return nplot.distribution(
            rent,
            "RENT_GROUPS",
            title="Monthly Rent Distribution",
        )

In [37]:
plot_distribution("RENT_GROUPS")

2023-04-19 20:07:35.129 query: [SELECT * FROM PERSONAL.PUBLIC.RENT_GROUPS WHERE NTANAME = 'Mott Haven-Port Morri...]
2023-04-19 20:07:35.345 query execution done
2023-04-19 20:07:35.345 Number of results in first chunk: 1
