In [None]:
def pearson_correlation_scatterplot(df, cols,
                                    title= "Pearson Correlation Scatterplot",
                                    xlabel= None, ylabel = None):
    """
    Creates a scatterplot for two specified columns from the DataFrame,
    calculates the Pearson correlation coefficient, and displays the plot.

    Parameters:
      df : pd.DataFrame
          The input DataFrame containing the data.
      cols : list
          List of two column names (as strings) for which the scatterplot and correlation
          coefficient will be computed.
      title : str, optional
          The title for the scatterplot. Default is "Pearson Correlation Scatterplot".
      xlabel : str, optional
          Label for the x-axis. If None, the first column name from cols is used.
      ylabel : str, optional
          Label for the y-axis. If None, the second column name from cols is used.

    Returns:
      corr_coef : float
          The computed Pearson correlation coefficient.
      p_value : float
          The p-value for testing non-correlation.
    """
    if len(cols) != 2:
        raise ValueError("Exactly two columns must be provided for Pearson correlation.")

    x_col, y_col = cols
    x = df[x_col]
    y = df[y_col]

    # Calculate the Pearson correlation coefficient and p-value.
    corr_coef, p_value = pearsonr(x, y)

    # Setup axis labels if not provided.
    if xlabel is None:
        xlabel = x_col
    if ylabel is None:
        ylabel = y_col

    # Create scatter plot.
    plt.figure(figsize=(8, 6))
    plt.scatter(x, y, alpha=0.7, c='blue', edgecolors='w', s=100)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    # Display the correlation coefficient on the plot.
    plt.text(0.05, 0.95, f"Pearson r = {corr_coef:.3f}\np-value = {p_value:.3g}",
             transform=plt.gca().transAxes,
             verticalalignment='top', bbox=dict(boxstyle="round", facecolor="wheat", alpha=0.5))

    plt.grid(True)
    plt.tight_layout()
    plt.show()

    return corr_coef, p_value

In [None]:
def spearman_correlation_scatterplot(df, cols,
                                    title= "spearman rank Correlation Scatterplot",
                                    xlabel= None, ylabel = None):
    """
    Creates a scatterplot for two specified columns from the DataFrame,
    calculates the Spearman's rank correlation coefficient, and displays the plot.

    Parameters:
      df : pd.DataFrame
          The input DataFrame containing the data.
      cols : list
          List of two column names (as strings) for which the scatterplot and correlation
          coefficient will be computed.
      title : str, optional
          The title for the scatterplot. Default is "Spearman Rank Correlation Scatterplot".
      xlabel : str, optional
          Label for the x-axis. If None, the first column name from cols is used.
      ylabel : str, optional
          Label for the y-axis. If None, the second column name from cols is used.

    Returns:
      corr_coef : float
          The computed Spearman rank correlation coefficient.
      p_value : float
          The p-value for testing non-correlation.
    """
    if len(cols) != 2:
        raise ValueError("Exactly two columns must be provided for Spearman correlation.")

    x_col, y_col = cols
    x = df[x_col]
    y = df[y_col]

    # Calculate Spearman's rank correlation coefficient and corresponding p-value.
    corr_coef, p_value = spearmanr(x, y)

    # Setup axis labels if not provided.
    if xlabel is None:
        xlabel = x_col
    if ylabel is None:
        ylabel = y_col

    # Create scatter plot.
    plt.figure(figsize=(8, 6))
    plt.scatter(x, y, alpha=0.7, c='green', edgecolors='w', s=100)
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)

    # Display the correlation coefficient on the plot.
    plt.text(0.05, 0.95, f"Spearman r = {corr_coef:.3f}\np-value = {p_value:.3g}",
             transform=plt.gca().transAxes,
             verticalalignment='top', bbox=dict(boxstyle="round", facecolor="lavender", alpha=0.5))

    plt.grid(True)
    plt.tight_layout()
    plt.show()

    return corr_coef, p_value

In [None]:
def extract_and_upweight_visits(df):
    df["POPULARITY_BY_DAY"] = df["POPULARITY_BY_DAY"].apply(lambda x: ast.literal_eval(x) if isinstance(x, str) else x)

    # Extract weekday and weekend visits
    df["weekday_visits"] = df["POPULARITY_BY_DAY"].apply(lambda x: sum([x.get(day, 0) for day in ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday"]]))
    df["weekend_visits"] = df["POPULARITY_BY_DAY"].apply(lambda x: sum([x.get(day, 0) for day in ["Saturday", "Sunday"]]))

    # Multiply by upweighting_factor
    #df["weekday_visits"] *= df["upweighting_factor"]
    #df["weekend_visits"] *= df["upweighting_factor"]

    return df

In [None]:
!git clone https://github.com/comradeshawty/patterns.git
%cd patterns

Cloning into 'patterns'...
remote: Enumerating objects: 367, done.[K
remote: Counting objects: 100% (71/71), done.[K
remote: Compressing objects: 100% (71/71), done.[K
remote: Total 367 (delta 38), reused 0 (delta 0), pack-reused 296 (from 2)[K
Receiving objects: 100% (367/367), 2.89 MiB | 6.41 MiB/s, done.
Resolving deltas: 100% (199/199), done.
/content/patterns/patterns


In [None]:
def compute_income_segregation(df, cbg_gdf):
    """
    For each POI (i.e. each row in df), compute an income segregation score
    based on the processed visitor counts, and compute experienced income segregation for each CBG.

    The function maps each CBG (key in the processed dict) to an income quartile
    (using cbg_income_map, which maps CBG (as int) to a quartile in {1,2,3,4}),
    sums the visitor counts by quartile per POI, and then calculates the POI segregation
    measure as:

         segregation = (2/3) * sum(|proportion - 0.25|)

    where the proportion is the fraction of visitors from each quartile at that POI.

    In addition, we compute the experienced income segregation for each CBG.
    For each POI (denoted by α):
      - τ₍q,α₎: the proportion of time at place α spent by income group q.
      - For each CBG b visiting that POI, τ₍b,α₎ is calculated as the count for b at α divided by
        the total visitors at α, but then normalized across all POIs (i.e. divided by the CBG's global total visits).
    Then, for each CBG, the relative exposure is:
         τ₍b,q₎ = Σ₍α visited by b₎ (τ₍b,α₎ * τ₍q,α₎)
    and the experienced income segregation measure is:
         Sᵢ = (2/3) * Σ₍q=1...4₎ |τ₍b,q₎ − 0.25|

    Additionally, this function adds a column to df named 'quartile_proportions'
    which contains, for each POI, a dictionary with the proportions of visitors from each income quartile.
    The dictionary is formatted as: {'low': prop, 'lower_middle': prop, 'upper_middle': prop, 'high': prop}

    Parameters:
      df     : DataFrame that includes the 'adjusted_cbg_visitors' column containing the processed visitor counts.
      cbg_gdf: GeoDataFrame with CBG information and an 'income_quantile' column;
               the income labels for CBGs are in {"low", "lower_middle", "upper_middle", "high"}.

    Returns:
      Tuple: (df with added columns 'income_segregation' and 'quartile_proportions',
              updated cbg_gdf with column 'experienced_income_segregation')
    """
    income_label_to_quartile = {"low": 1, "lower_middle": 2, "upper_middle": 3, "high": 4}

    # Ensure CBGs in cbg_gdf are correctly formatted.
    cbg_gdf["cbg"] = cbg_gdf["cbg"].astype(str).str.lstrip("0").astype(int)

    # Create mapping: CBG → Income Quartile.
    cbg_income_map = cbg_gdf.set_index("cbg")["income_quantile"].map(income_label_to_quartile).to_dict()

    def segregation_from_dict(visitor_dict):
        """
        Compute the POI-level income segregation score along with the distribution of visitor proportions by quartile.
        Returns a tuple: (segregation score, proportions array, proportions dictionary, total visitors at the POI).
        """
        quartile_counts = np.zeros(4, dtype=float)
        for cbg, count in visitor_dict.items():
            try:
                cbg_int = int(cbg)
            except Exception:
                continue
            quartile = cbg_income_map.get(cbg_int, None)
            if quartile is not None:
                quartile_counts[quartile - 1] += count  # store in 0-based index.

        total = quartile_counts.sum()
        if total == 0:
            default_proportions = {"low": 0, "lower_middle": 0, "upper_middle": 0, "high": 0}
            return np.nan, None, default_proportions, total

        proportions = quartile_counts / total  # Fraction for each quartile.
        segregation = (2/3) * np.sum(np.abs(proportions - 0.25))
        proportions_dict = {
            "low": proportions[0],
            "lower_middle": proportions[1],
            "upper_middle": proportions[2],
            "high": proportions[3]
        }
        return segregation, proportions, proportions_dict, total

    # Compute POI-level segregation scores and prepare for CBG-level aggregation.
    poi_segregation_scores = []         # Holds segregation score for each POI.
    quartile_proportions_list = []        # Holds the proportions dictionary for each POI.

    # Dictionary to hold total visits per CBG across all POIs.
    total_visits_per_cbg = {}
    # Dictionary to accumulate exposure contributions per CBG; key: cbg, value: np.array (length 4)
    cbg_exposure = {}

    # First pass: calculate global total visits for each CBG across all POIs.
    for idx, row in df.iterrows():
        visitor_dict = row['adjusted_cbg_visitors']
        for cbg, count in visitor_dict.items():
            try:
                cbg_int = int(cbg)
            except Exception:
                continue
            total_visits_per_cbg[cbg_int] = total_visits_per_cbg.get(cbg_int, 0) + count

    # Second pass: compute each POI's quartile proportions and accumulate CBG exposure contributions.
    for idx, row in df.iterrows():
        visitor_dict = row['adjusted_cbg_visitors']
        segregation_value, proportions, proportions_dict, total_alpha = segregation_from_dict(visitor_dict)

        poi_segregation_scores.append(segregation_value)
        quartile_proportions_list.append(proportions_dict)

        # For each CBG present in the POI, compute its weight for this POI and add its exposure contribution.
        for cbg, count in visitor_dict.items():
            try:
                cbg_int = int(cbg)
            except Exception:
                continue
            global_total = total_visits_per_cbg.get(cbg_int, 0)
            if global_total == 0 or proportions is None:
                continue
            # τ₍b,α₎: fraction of the CBG's visits that occur at this POI.
            tau_b_alpha = count / global_total
            contribution = tau_b_alpha * proportions
            if cbg_int in cbg_exposure:
                cbg_exposure[cbg_int] += contribution
            else:
                cbg_exposure[cbg_int] = np.array(contribution, dtype=float)

    # Add the computed POI-level income segregation scores and quartile proportions as new columns in df.
    df = df.copy()
    df['Sα'] = poi_segregation_scores
    df['quartile_proportions'] = quartile_proportions_list

    # Compute experienced income segregation for each CBG and add it to the cbg_gdf.
    experienced_income_segregation = {}
    for cbg, exposure_array in cbg_exposure.items():
        exposure_sum = exposure_array.sum()
        if exposure_sum == 0:
            experienced_income_segregation[cbg] = np.nan
        else:
            normalized_exposure = exposure_array / exposure_sum
            experienced_income_segregation[cbg] = (2/3) * np.sum(np.abs(normalized_exposure - 0.25))

    cbg_gdf = cbg_gdf.copy()
    cbg_gdf['Si'] = cbg_gdf['cbg'].map(experienced_income_segregation)

    return df, cbg_gdf

In [None]:
def merge_2010_to_2020_block_groups(expanded_df, crosswalk_df, cbg_col='cbg'):
    expanded_df=expanded_df.merge(crosswalk_df,left_on=cbg_col,right_on='bg2010ge',how='left')
    expanded_df.rename(columns={'bg2020ge':'cbg_2020'},inplace=True)
    if 'geometry' in expanded_df.columns:
        expanded_df = gpd.GeoDataFrame(expanded_df, geometry='geometry', crs="EPSG:4326")

    return expanded_df

In [None]:
def extract_unique_cbg_keys(df):
    """
    Given a DataFrame with an 'adjusted_cbg_visitors' column (with dictionaries as values),
    extract a set of unique CBG keys (as integers) across all rows.
    """
    unique_keys = set()
    for visitors in df['adjusted_cbg_visitors']:
        # Convert each key to int before adding
        for key in visitors.keys():
            try:
                unique_keys.add(int(key))
            except ValueError:
                continue
    return unique_keys

def count_common_cbgs(cbg_gdf,crosswalk_df, unique_cbg_keys):
    """
    Count how many CBGs in the 'cbg' column of cbg_gdf (converted to integers)
    are also in unique_cbg_keys.
    """
    # Ensure that the cbg column is converted to integers.
    cbg_set = set(cbg_gdf['cbg'].astype(int))
    cbg_2020_set=set(crosswalk_df['GEOID_BLKGRP_20'].astype(int))
    common = cbg_set.intersection(cbg_2020_set)
    common_2020 = unique_cbg_keys.intersection(cbg_2020_set)
    return len(common), common,common_2020,len(common_2020)

In [None]:
def compute_quintile_income_segregation(df, cbg_gdf):
    """
    For each POI (i.e. each row in df), compute an income segregation score
    based on the processed visitor counts, and compute experienced income segregation for each CBG.

    The function maps each CBG (key in the processed dict) to an income quintile
    (using cbg_income_map, which maps CBG (as int) to a quartile in {1,2,3,4,5}),
    sums the visitor counts by quartile per POI, and then calculates the POI segregation
    measure as:

         segregation = (5/8) * sum(|proportion - 0.2|)

    where the proportion is the fraction of visitors from each quartile at that POI.

    In addition, we compute the experienced income segregation for each CBG.
    For each POI (denoted by α):
      - τ₍q,α₎: the proportion of time at place α spent by income group q.
      - For each CBG b visiting that POI, τ₍b,α₎ is calculated as the count for b at α divided by
        the total visitors at α, but then normalized across all POIs (i.e. divided by the CBG's global total visits).
    Then, for each CBG, the relative exposure is:
         τ₍b,q₎ = Σ₍α visited by b₎ (τ₍b,α₎ * τ₍q,α₎)
    and the experienced income segregation measure is:
         Sᵢ = (5/8) * Σ₍q=1...5₎ |τ₍b,q₎ − 0.2|

    Additionally, this function adds a column to df named 'quintile_proportions'
    which contains, for each POI, a dictionary with the proportions of visitors from each income quintile.
    The dictionary is formatted as: {'low': prop, 'lower_middle': prop, 'middle':prop,'upper_middle': prop, 'high': prop}

    Parameters:
      df     : DataFrame that includes the 'adjusted_cbg_visitors' column containing the processed visitor counts.
      cbg_gdf: GeoDataFrame with CBG information and an 'income_quintile' column;
               the income labels for CBGs are in {"low", "lower_middle", "middle","upper_middle", "high"}.

    Returns:
      Tuple: (df with added columns 'quintile_income_segregation' and 'quintile_proportions',
              updated cbg_gdf with column 'experienced_income_segregation')
    """
    income_label_to_quintile = {"low": 1, "lower_middle": 2,"middle":3, "upper_middle": 4, "high": 5}

    # Ensure CBGs in cbg_gdf are correctly formatted.
    cbg_gdf["cbg"] = cbg_gdf["cbg"].astype(str).str.lstrip("0").astype(int)

    # Create mapping: CBG → Income Quartile.
    cbg_income_map = cbg_gdf.set_index("cbg")["income_quintile"].map(income_label_to_quintile).to_dict()

    def segregation_from_dict(visitor_dict):
        """
        Compute the POI-level income segregation score along with the distribution of visitor proportions by quartile.
        Returns a tuple: (segregation score, proportions array, proportions dictionary, total visitors at the POI).
        """
        quintile_counts = np.zeros(5, dtype=float)
        for cbg, count in visitor_dict.items():
            try:
                cbg_int = int(cbg)
            except Exception:
                continue
            quintile = cbg_income_map.get(cbg_int, None)
            if quintile is not None:
                quintile_counts[quintile - 1] += count  # store in 0-based index.

        total = quintile_counts.sum()
        if total == 0:
            default_proportions = {"low": 0, "lower_middle": 0, "middle":0,"upper_middle": 0, "high": 0}
            return np.nan, None, default_proportions, total

        proportions = quintile_counts / total  # Fraction for each quartile.
        segregation = (5/8) * np.sum(np.abs(proportions - 0.2))
        proportions_dict = {
            "low": proportions[0],
            "lower_middle": proportions[1],
            "middle":proportions[2],
            "upper_middle": proportions[3],
            "high": proportions[4]
        }
        return segregation, proportions, proportions_dict, total

    # Compute POI-level segregation scores and prepare for CBG-level aggregation.
    poi_segregation_scores = []         # Holds segregation score for each POI.
    quintile_proportions_list = []        # Holds the proportions dictionary for each POI.

    # Dictionary to hold total visits per CBG across all POIs.
    total_visits_per_cbg = {}
    # Dictionary to accumulate exposure contributions per CBG; key: cbg, value: np.array (length 5)
    cbg_exposure = {}

    # First pass: calculate global total visits for each CBG across all POIs.
    for idx, row in df.iterrows():
        visitor_dict = row['adjusted_cbg_visitors']
        for cbg, count in visitor_dict.items():
            try:
                cbg_int = int(cbg)
            except Exception:
                continue
            total_visits_per_cbg[cbg_int] = total_visits_per_cbg.get(cbg_int, 0) + count

    # Second pass: compute each POI's quartile proportions and accumulate CBG exposure contributions.
    for idx, row in df.iterrows():
        visitor_dict = row['adjusted_cbg_visitors']
        segregation_value, proportions, proportions_dict, total_alpha = segregation_from_dict(visitor_dict)

        poi_segregation_scores.append(segregation_value)
        quintile_proportions_list.append(proportions_dict)

        # For each CBG present in the POI, compute its weight for this POI and add its exposure contribution.
        for cbg, count in visitor_dict.items():
            try:
                cbg_int = int(cbg)
            except Exception:
                continue
            global_total = total_visits_per_cbg.get(cbg_int, 0)
            if global_total == 0 or proportions is None:
                continue
            # τ₍b,α₎: fraction of the CBG's visits that occur at this POI.
            tau_b_alpha = count / global_total
            contribution = tau_b_alpha * proportions
            if cbg_int in cbg_exposure:
                cbg_exposure[cbg_int] += contribution
            else:
                cbg_exposure[cbg_int] = np.array(contribution, dtype=float)

    # Add the computed POI-level income segregation scores and quartile proportions as new columns in df.
    df = df.copy()
    df['Sα_q'] = poi_segregation_scores
    df['quintile_proportions'] = quintile_proportions_list

    # Compute experienced income segregation for each CBG and add it to the cbg_gdf.
    experienced_income_segregation = {}
    for cbg, exposure_array in cbg_exposure.items():
        exposure_sum = exposure_array.sum()
        if exposure_sum == 0:
            experienced_income_segregation[cbg] = np.nan
        else:
            normalized_exposure = exposure_array / exposure_sum
            experienced_income_segregation[cbg] = (5/8) * np.sum(np.abs(normalized_exposure - 0.2))

    cbg_gdf = cbg_gdf.copy()
    cbg_gdf['Si_q'] = cbg_gdf['cbg'].map(experienced_income_segregation)

    return df, cbg_gdf

In [None]:
def compute_residential_income_segregation(cbg_gdf):
    """
    Computes S_res (residential income segregation) for each CBG using
    fixed bracket definitions for 'low', 'lower_middle', 'upper_middle', 'high'.

    Each row of cbg_gdf is expected to have columns representing the
    number of households in these brackets:
      'less_than_10k', '10k_15k', '15k_to_20k', '20k_to_25k', '25k_to_30k',
      '30k_to_35k', '35k_to_40k', '40k_to_45k', '45k_to_50k', '50k_to_60k',
      '60k_to_75k', '75k_to_100k', '100k_to_125k', '125k_to_150k',
      '150k_to_200k', '200k_or_more'.

    The category definitions (from bracket to income group) are:
      low =  { 'less_than_10k', '10k_15k', '15k_to_20k' }
      lower_middle = { '20k_to_25k', '25k_to_30k', '30k_to_35k', '35k_to_40k', '40k_to_45k', '45k_to_50k' }
      upper_middle = { '50k_to_60k', '60k_to_75k', '75k_to_100k' }
      high = { '100k_to_125k', '125k_to_150k', '150k_to_200k', '200k_or_more' }

    We define the segregation measure using four quartiles:
      S_res = (2/3) * sum( | proportion_in_quartile - 0.25 | ) over all quartiles.

    Parameters
    ----------
    cbg_gdf : DataFrame (or GeoDataFrame)
        Must have the columns for each bracket listed above.

    Returns
    -------
    cbg_gdf : DataFrame (copy)
        A modified copy of the original with an added column "S_res" that holds
        the computed segregation measure per CBG.
    """

    # Mapping from bracket columns to quartile category
    bracket_map = {
        'low': ['less_than_10k', '10k_15k', '15k_to_20k'],
        'lower_middle': ['20k_to_25k', '25k_to_30k', '30k_to_35k','35k_to_40k', '40k_to_45k', '45k_to_50k'],
        'upper_middle': ['50k_to_60k', '60k_to_75k', '75k_to_100k'],
        'high': ['100k_to_125k', '125k_to_150k','150k_to_200k', '200k_or_more']}

    def compute_s_res_for_row(row):
        q_pops = []
        total_pop = 0.0
        for category in ['low', 'lower_middle', 'upper_middle', 'high']:
            cat_sum = 0.0
            for bracket_col in bracket_map[category]:
                cat_sum += float(row.get(bracket_col, 0.0))
            q_pops.append(cat_sum)
            total_pop += cat_sum

        if total_pop == 0:
            return np.nan

        proportions = [pop / total_pop for pop in q_pops]
        s_res = (2.0 / 3.0) * sum(abs(p - 0.25) for p in proportions)
        return s_res

    new_gdf = cbg_gdf.copy()
    new_gdf["S_res"] = new_gdf.apply(compute_s_res_for_row, axis=1)
    new_gdf.dropna(subset=['Si','S_res'],inplace=True,ignore_index=True)
    return new_gdf

In [None]:
def compute_place_entropy(df, cbg_gdf):
    """
    For each place (each row in df), compute an entropy measure Hₐ that quantifies
    the unevenness of the distribution of visitor groups (income quartiles in this case).

    The entropy is defined as:

         Hₐ = - ( Σ₍q=1 to 4₎ τ₍q,α₎ log(τ₍q,α₎) ) / log(4)

    where τ₍q,α₎ is the fraction of visitors at place α from income quartile q.
    When the groups are equally present (τ = 1/4 for every group), then Hₐ = 1.
    When only one group visits (one τ is 1 and the rest 0), then Hₐ = 0.

    This function adapts the compute_income_segregation function from income_segregation.py
    by aggregating visitor counts (from the 'adjusted_cbg_visitors' column in df) by
    the income quartile of each CBG (based on the cbg_gdf's 'income_quantile' column) and
    computing the entropy of the distribution.

    Parameters:
      df     : DataFrame that includes the column 'adjusted_cbg_visitors' where each value
               is a dictionary mapping CBG (as a string or int) to visitor counts.
      cbg_gdf: DataFrame (or GeoDataFrame) with CBG information, including at least two columns:
               - 'cbg': unique identifier for each CBG.
               - 'income_quantile': income quartile label, one of {"low", "lower_middle", "upper_middle", "high"}.

    Returns:
      df : A copy of the original df with an added column 'entropy_measure' containing Hₐ for each place.
    """
    income_label_to_quartile = {
        "low": 1,
        "lower_middle": 2,
        "upper_middle": 3,
        "high": 4
    }

    cbg_gdf = cbg_gdf.copy()
    cbg_gdf["cbg"] = cbg_gdf["cbg"].astype(str).str.lstrip("0").astype(int)

    cbg_income_map = cbg_gdf.set_index("cbg")["income_quantile"].map(income_label_to_quartile).to_dict()

    def entropy_from_dict(visitor_dict):
        """
        For a given visitor dictionary from adjusted_cbg_visitors, aggregate visitor counts by income quartile,
        compute the quantified proportions, and then calculate the entropy measure Hₐ.
        """
        quartile_counts = np.zeros(4, dtype=float)  # indices 0 through 3 correspond to quartiles 1 to 4
        for cbg, count in visitor_dict.items():
            try:
                cbg_int = int(cbg)
            except Exception:
                continue
            quartile = cbg_income_map.get(cbg_int, None)
            if quartile is not None:
                quartile_counts[quartile - 1] += count

        total = quartile_counts.sum()
        if total == 0:
            return np.nan

        # Compute proportions τ₍q,α₎ for each quartile
        proportions = quartile_counts / total

        # Compute the entropy measure
        # Handle p = 0 by using 0 * log(p) = 0.
        entropy_sum = 0.0
        for p in proportions:
            if p > 0:
                entropy_sum += p * np.log(p)
        # Normalize entropy to be between 0 and 1.
        H = - entropy_sum / np.log(4)
        return H

    # Compute the entropy measure for each place (i.e. each row in df)
    df = df.copy()
    df["Hα"] = df["adjusted_cbg_visitors"].apply(entropy_from_dict)
    return df

In [None]:
def compute_out_of_cbg_visitors(mp):
  def calc_out_of_cbg_visitors(row):
          visitors = row.get('adjusted_cbg_visitors', {})
          # Ensure the POI_CBG is treated as a string for comparison
          poi_cbg = str(row.get('POI_CBG', ''))
          total = 0
          for cbg, count in visitors.items():
              # Compare keys as strings to ensure consistency
              if str(cbg) != poi_cbg:
                  total += count
          return total
  mp = mp.copy()
  mp['out_of_cbg_visitors'] = mp.apply(calc_out_of_cbg_visitors, axis=1)
  return mp

In [None]:
def calc_avg_dwell(stops_mp):
  if stops_mp['BUCKETED_DWELL_TIMES'].dtype == object and stops_mp['BUCKETED_DWELL_TIMES'].apply(lambda x: isinstance(x, str)).all():
      stops_mp['BUCKETED_DWELL_TIMES'] = stops_mp['BUCKETED_DWELL_TIMES'].apply(ast.literal_eval)

  buckets = ["<5", "5-20", "21-60", "61-240", ">240"]
  rep_values = np.array([2.5, 12.5, 40.5, 150, 300])  # Representative dwell times
  for bucket in buckets:
      stops_mp[bucket] = stops_mp["BUCKETED_DWELL_TIMES"].str.get(bucket).fillna(0)

  counts = stops_mp[buckets]

  weighted_sum = (counts * rep_values).sum(axis=1)
  total_weighted_count = counts.sum(axis=1)
  stops_mp["weighted_avg_dwell_time"] = np.where(total_weighted_count > 0, weighted_sum / total_weighted_count, np.nan)
  return stops_mp

In [None]:
def save_mp():
  mptemp=mp.copy()
  mptemp.drop(columns='geometry',inplace=True)
  mptemp.to_csv('/content/drive/MyDrive/data/final_mp.csv',index=False)

In [None]:
def kde_twocols(stops_mp, col1, col2):
  sns.set_style("whitegrid")

  # Plot KDE
  plt.figure(figsize=(10, 6))
  sns.kdeplot(stops_mp[col1], label=col1, fill=True, alpha=0.5)
  sns.kdeplot(stops_mp[col2], label=col2, fill=True, alpha=0.5)

  # Labels and title
  plt.xlabel("Time (minutes)")
  plt.ylabel("Density")
  plt.title(f"KDE of {col1} and {col2}")
  plt.legend()
  plt.show()

In [None]:
def temp(stops_mp):


  def safe_convert(value):
      """Convert string representation of lists/dicts into actual lists/dicts."""
      if isinstance(value, str):
          try:
              return ast.literal_eval(value)  # Convert to list or dict
          except:
              return np.nan  # If conversion fails, return NaN
      return value

  # Apply conversion to necessary columns
  for col in ['adjusted_visits_by_day', 'stops_by_day', 'stops_by_day_of_week', 'POPULARITY_BY_DAY','stops_by_hour','POPULARITY_BY_HOUR']:
      stops_mp[col] = stops_mp[col].apply(safe_convert)

  # Convert stops_by_hour and POPULARITY_BY_HOUR lists to their sum
  stops_mp['stops_by_hour'] = stops_mp['stops_by_hour'].apply(lambda x: np.sum(x) if isinstance(x, list) else np.nan)
  stops_mp['POPULARITY_BY_HOUR'] = stops_mp['POPULARITY_BY_HOUR'].apply(lambda x: np.sum(x) if isinstance(x, list) else np.nan)

  # Convert adjusted_visits_by_day and stops_by_day lists to their sum
  stops_mp['adjusted_visits_by_day'] = stops_mp['adjusted_visits_by_day'].apply(lambda x: np.sum(x) if isinstance(x, list) else np.nan)
  stops_mp['stops_by_day'] = stops_mp['stops_by_day'].apply(lambda x: np.sum(x) if isinstance(x, list) else np.nan)

  # Normalize stops_by_day_of_week to match POPULARITY_BY_DAY format
  def normalize_day_keys(stops_dict):
      """Reformat stops_by_day_of_week to match POPULARITY_BY_DAY structure."""
      if isinstance(stops_dict, dict):
          corrected_order = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
          corrected_dict = {day: stops_dict.get(day.lower(), 0) for day in corrected_order}  # Normalize keys
          return list(corrected_dict.values())  # Convert back to list format
      return np.nan

  stops_mp['stops_by_day_of_week'] = stops_mp['stops_by_day_of_week'].apply(normalize_day_keys)
  stops_mp['POPULARITY_BY_DAY'] = stops_mp['POPULARITY_BY_DAY'].apply(lambda x: list(x.values()) if isinstance(x, dict) else np.nan)

  # Function to compute correlation safely
  def compute_correlation(x, y):
      x, y = x.dropna().reset_index(drop=True), y.dropna().reset_index(drop=True)
      if len(x) > 1 and len(y) > 1 and len(x) == len(y):
          return np.corrcoef(x, y)[0, 1]
      else:
          return np.nan  # Return NaN if not enough valid data

  # Compute correlations
  correlations = {
      "adjusted_visits_by_day vs. stops_by_day": compute_correlation(stops_mp['adjusted_visits_by_day'], stops_mp['stops_by_day']),
      "stops_by_hour vs. POPULARITY_BY_HOUR": compute_correlation(stops_mp['stops_by_hour'], stops_mp['POPULARITY_BY_HOUR']),
      "POPULARITY_BY_DAY vs. stops_by_day_of_week": compute_correlation(
          pd.Series(np.concatenate(stops_mp['POPULARITY_BY_DAY'].dropna().values)),
          pd.Series(np.concatenate(stops_mp['stops_by_day_of_week'].dropna().values))
      )
  }

  # Print correlation results
  print("Correlations:")
  for key, value in correlations.items():
      print(f"{key}: {value:.4f}")

  # Plot scatterplots
  plt.figure(figsize=(12, 4))

  # Scatterplot: adjusted_visits_by_day vs. stops_by_day
  plt.subplot(1, 3, 1)
  plt.scatter(stops_mp['adjusted_visits_by_day'], stops_mp['stops_by_day'], alpha=0.5)
  plt.xlabel("Adjusted Visits by Day")
  plt.ylabel("Stops by Day")
  plt.title("Scatterplot: Adjusted Visits vs. Stops by Day")

  # Scatterplot: stops_by_hour vs. POPULARITY_BY_HOUR
  plt.subplot(1, 3, 2)
  plt.scatter(stops_mp['stops_by_hour'], stops_mp['POPULARITY_BY_HOUR'], alpha=0.5)
  plt.xlabel("Stops by Hour")
  plt.ylabel("Popularity by Hour")
  plt.title("Scatterplot: Stops by Hour vs. Popularity")

  # Scatterplot: POPULARITY_BY_DAY vs. stops_by_day_of_week
  plt.subplot(1, 3, 3)
  plt.scatter(
      np.concatenate(stops_mp['POPULARITY_BY_DAY'].dropna().values).flatten(),
      np.concatenate(stops_mp['stops_by_day_of_week'].dropna().values).flatten(),
      alpha=0.5
  )
  plt.xlabel("Popularity by Day")
  plt.ylabel("Stops by Day of Week")
  plt.title("Scatterplot: Popularity by Day vs. Stops by Day")

  plt.tight_layout()
  plt.show()

In [None]:
import pandas as pd
pd.set_option('display.max_columns', None)
import random
import geopandas as gpd
from shapely.geometry import Polygon,Point
import numpy as np
import regex as re
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import pearsonr
from collections import Counter
import math
from scipy.spatial import cKDTree
from scipy.stats import spearmanr
import warnings
warnings.filterwarnings('ignore')
import ast
from ast import literal_eval
from cbgs_processor import compute_racial_weighted_mean,compute_income_weighted_mean,compute_racial_visitor_counts,compute_weighted_mean,compute_racial_visitor_counts,normalize_cbg_data,compute_exact_visitor_counts
from income_segregation import compute_residential_income_segregation,compute_income_segregation,get_income_data,calculate_income_quintiles_cbsa,compute_quintile_income_segregation
from helpers import load_data
from racial_segregation import compute_racial_segregation_with_exposure,get_racial_data,compute_racial_segregation_with_cbsa_baseline
#segments_gdf=gpd.read_file('/content/drive/MyDrive/data/segments_gdf.geojson')
#route_stats=pd.read_csv('/content/drive/MyDrive/data/route_stats.csv')
#spend=pd.read_csv('/content/drive/MyDrive/data/brh_sp_2023.csv')
nwd=gpd.read_file('/content/drive/MyDrive/data/nwd/NationalWalkabilityIndex.shp')
stops_mp=pd.read_csv('/content/drive/MyDrive/data/stops_mp.csv')
mp, cbg_gdf,brh_np=load_data()
cbg_gdf['no_veh_pop']=cbg_gdf['no_veh_renter']+cbg_gdf['no_veh_owner']
#mp=pd.read_csv('/content/drive/MyDrive/data/mp.csv')
cbg_gdf=normalize_cbg_data(cbg_gdf)
mp=compute_racial_weighted_mean(mp,cbg_gdf)
mp=compute_income_weighted_mean(mp,cbg_gdf)

#mp = compute_racial_visitor_counts(mp, 'weighted_means', 'visitor_counts_cbg_scaled')
#mp = compute_exact_visitor_counts(mp, 'weighted_means', 'RAW_VISITOR_COUNTS', 'no_veh_renter_frac', 'no_veh_renters')
#mp = compute_exact_visitor_counts(mp, 'weighted_means', 'RAW_VISITOR_COUNTS', 'no_veh_owner_frac', 'no_veh_owners')
#mp['visitors_w_no_car'] = mp['no_veh_renters'] + mp['no_veh_owners']
#mp = compute_exact_visitor_counts(mp, 'weighted_means', 'RAW_VISITOR_COUNTS', 'with_disability_frac', 'visitors_w_disability')
#mp = compute_exact_visitor_counts(mp, 'weighted_means', 'RAW_VISITOR_COUNTS', 'below_poverty_frac', 'visitors_below_poverty')
#mp = compute_exact_visitor_counts(mp, 'weighted_means', 'RAW_VISITOR_COUNTS', 'commuting_pop_frac', 'commuting_visitors')
#mp = compute_exact_visitor_counts(mp, 'weighted_means', 'RAW_VISITOR_COUNTS', 'unemployment_p', 'unemployed_visitors')
nwd['GEOID10']=nwd['GEOID10'].astype(str).str.lstrip("0").astype(int)
nwd['GEOID20']=nwd['GEOID20'].astype(str).str.lstrip("0").astype(int)
mp['POI_CBG']=mp['POI_CBG'].astype(str).str.lstrip("0").astype(int)
mp,cbg_gdf=compute_income_segregation(mp,cbg_gdf)
mp,cbg_gdf=compute_racial_segregation_with_exposure(mp,cbg_gdf)
mp=extract_and_upweight_visits(mp)
cbg_gdf = compute_residential_income_segregation(cbg_gdf)
brh_np["AREA"] = brh_np["AREA"].astype(str).str.lstrip("0").astype(int)
cbg_gdf["cbg"] = cbg_gdf["cbg"].astype(str).str.lstrip("0").astype(int)
mp['POI_CBG']=mp['POI_CBG'].astype(str).str.lstrip("0").astype(int)
cbg_gdf = cbg_gdf.loc[:, ~cbg_gdf.columns.str.startswith(('poi_count', 'Median'))]
cbg_gdf['upweighting_factor']=cbg_gdf['tot_pop']/(1e-9+cbg_gdf['number_devices_residing'])
mp=mp.merge(cbg_gdf[['cbg','income_quantile','upweighting_factor']],left_on='POI_CBG',right_on='cbg',how='left')
mp['adjusted_cbg_visitors_str']=mp['adjusted_cbg_visitors'].astype(str)
mp['place_category'] = mp['place_category'].str.strip()
mp['place_subcategory'] = mp['place_subcategory'].str.strip()
mp['adjusted_cbg_visitors_str'] = mp['adjusted_cbg_visitors_str'].str.strip()
mp.drop_duplicates(subset=['adjusted_cbg_visitors_str','place_category'], keep='first', inplace=True)
priority_categories = {
    'Arts and Culture', 'Retail for Basic Necessities', 'Healthcare',
    'Restaurants', 'Sports and Exercise', 'City/Outdoors',
    'Religious Organizations', 'Social Support',
    'Coffee Shops, Snacks & Bakeries', 'College', 'Entertainment',
    'Transportation', 'School'}

mp = mp.sort_values(by=["adjusted_cbg_visitors_str", "place_category"], ascending=True)
def custom_deduplication(group):
    priority_rows = group[group["place_category"].isin(priority_categories)]

    if not priority_rows.empty:
        return priority_rows.iloc[[0]]
    else:
        return group.iloc[[0]]

mp = mp.groupby("adjusted_cbg_visitors_str", group_keys=False).apply(custom_deduplication)
mp = mp.reset_index(drop=True)
mp.drop(columns=['adjusted_cbg_visitors_str'],inplace=True)
mp=mp.sort_values(by='RAW_VISITOR_COUNTS',ascending=False)
mp.reset_index(drop=True,inplace=True)
mask = (mp["merged_flag"] == True) & (mp["BRANDS"].notna())
mp.loc[mask, "LOCATION_NAME"] = mp.loc[mask, "BRANDS"]
mp.dropna(subset='Sα',inplace=True,ignore_index=True)
mp=compute_out_of_cbg_visitors(mp)


In [None]:
import numpy as np
import ast

# Ensure POPULARITY_BY_HOUR is a list
def safe_convert_to_list(value):
    if isinstance(value, str):  # Convert only if it's a string
        return ast.literal_eval(value)
    return value  # Return as-is if already a list

# Ensure POPULARITY_BY_DAY is a dictionary
def safe_convert_to_dict(value):
    if isinstance(value, str):  # Convert only if it's a string
        return ast.literal_eval(value)
    return value  # Return as-is if already a dictionary

# Apply the conversion safely
mp['POPULARITY_BY_HOUR'] = mp['POPULARITY_BY_HOUR'].apply(safe_convert_to_list)
mp['POPULARITY_BY_DAY'] = mp['POPULARITY_BY_DAY'].apply(safe_convert_to_dict)

# Multiply each entry in POPULARITY_BY_HOUR by upweighting_factor (vectorized)
mp['POPULARITY_BY_HOUR'] = mp.apply(lambda row: (np.array(row['POPULARITY_BY_HOUR']) * row['upweighting_factor']).tolist()
                                    if isinstance(row['POPULARITY_BY_HOUR'], list) else row['POPULARITY_BY_HOUR'], axis=1)

# Multiply each entry in POPULARITY_BY_DAY by upweighting_factor (vectorized)
mp['POPULARITY_BY_DAY'] = mp.apply(lambda row: {k: v * row['upweighting_factor'] for k, v in row['POPULARITY_BY_DAY'].items()}
                                   if isinstance(row['POPULARITY_BY_DAY'], dict) else row['POPULARITY_BY_DAY'], axis=1)
mp[['POPULARITY_BY_HOUR','POPULARITY_BY_DAY']]
import numpy as np

# Round up each entry in POPULARITY_BY_HOUR (vectorized)
mp['POPULARITY_BY_HOUR'] = mp.apply(lambda row: np.ceil(row['POPULARITY_BY_HOUR']).astype(int).tolist()
                                    if isinstance(row['POPULARITY_BY_HOUR'], list) else row['POPULARITY_BY_HOUR'], axis=1)

# Round up each entry in POPULARITY_BY_DAY (vectorized)
mp['POPULARITY_BY_DAY'] = mp.apply(lambda row: {k: int(np.ceil(v)) for k, v in row['POPULARITY_BY_DAY'].items()}
                                   if isinstance(row['POPULARITY_BY_DAY'], dict) else row['POPULARITY_BY_DAY'], axis=1)
mp['adjusted_cbg_visitors'] = mp.apply(lambda row: {k: int(np.ceil(v)) for k, v in row['adjusted_cbg_visitors'].items()}
                                   if isinstance(row['adjusted_cbg_visitors'], dict) else row['adjusted_cbg_visitors'], axis=1)
save_mp()

In [None]:
stops_mp.to_csv('/content/drive/MyDrive/data/stops_mp.csv',index=False)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import ast

# Function to safely convert string representations of lists/dicts
def safe_convert(value, default):
    if isinstance(value, str):
        try:
            return ast.literal_eval(value)  # Convert to list or dict
        except:
            return default  # If conversion fails, return a default value
    return value if isinstance(value, (list, dict)) else default

# Convert necessary columns safely
stops_mp['adjusted_visits_by_day'] = stops_mp['adjusted_visits_by_day'].apply(lambda x: safe_convert(x, []))
stops_mp['stops_by_day'] = stops_mp['stops_by_day'].apply(lambda x: safe_convert(x, []))
stops_mp['stops_by_hour'] = stops_mp['stops_by_hour'].apply(lambda x: safe_convert(x, []))
stops_mp['POPULARITY_BY_HOUR'] = stops_mp['POPULARITY_BY_HOUR'].apply(lambda x: safe_convert(x, []))
stops_mp['stops_by_day_of_week'] = stops_mp['stops_by_day_of_week'].apply(lambda x: safe_convert(x, {}))
stops_mp['POPULARITY_BY_DAY'] = stops_mp['POPULARITY_BY_DAY'].apply(lambda x: safe_convert(x, {}))

# Time Series Plot for Adjusted Visits by Day vs Stops by Day
dates = pd.date_range(start="2023-12-01", end="2023-12-31", freq="D")

plt.figure(figsize=(12, 6))
plt.plot(dates, stops_mp['adjusted_visits_by_day'][0], label="Adjusted Visits by Day", marker="o")
plt.plot(dates, stops_mp['stops_by_day'][0], label="Stops by Day", marker="s")
plt.xlabel("Date (December 2023)")
plt.ylabel("Counts")
plt.title("Adjusted Visits by Day vs Stops by Day (December 2023)")
plt.legend()
plt.xticks(rotation=45)
plt.grid(True)
plt.show()

# Time Series Plot for Stops by Hour vs Popularity by Hour
hours = np.arange(24)

plt.figure(figsize=(12, 6))
plt.plot(hours, stops_mp['stops_by_hour'][0], label="Stops by Hour", marker="o")
plt.plot(hours, stops_mp['POPULARITY_BY_HOUR'][0], label="Popularity by Hour", marker="s")
plt.xlabel("Hour of the Day")
plt.ylabel("Counts")
plt.title("Stops by Hour vs Popularity by Hour")
plt.xticks(hours)
plt.legend()
plt.grid(True)
plt.show()

# Time Series Plot for Popularity by Day vs Stops by Day
days_of_week = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]

stops_by_day_weekly = list(stops_mp['stops_by_day_of_week'][0].values())
popularity_by_day_weekly = list(stops_mp['POPULARITY_BY_DAY'][0].values())

plt.figure(figsize=(12, 6))
plt.plot(days_of_week, stops_by_day_weekly, label="Stops by Day of Week", marker="o")
plt.plot(days_of_week, popularity_by_day_weekly, label="Popularity by Day", marker="s")
plt.xlabel("Day of the Week")
plt.ylabel("Counts")
plt.title("Stops by Day vs Popularity by Day")
plt.legend()
plt.grid(True)
plt.show()


Unnamed: 0,PLACEKEY,PARENT_PLACEKEY,LOCATION_NAME,address,place_category,place_subcategory,CATEGORY_TAGS,MEDIAN_DWELL,weighted_median_distance_from_home,num_nearby_stops,nearby_stop_ids,POI_CBG,visitor_counts_cbg_scaled,adjusted_visits_by_day,stops_by_day,stops_by_day_of_week,transit_service_period,stops_by_hour,avg_time_between_stops,stop_frequency,median_headway,upweighting_factor,<5,5-20,21-60,61-240,weighted_avg_dwell_time,>240,adjusted_cbg_visitors,POPULARITY_BY_DAY,POPULARITY_BY_HOUR,adjusted_visits_by_day_sum
0,224-222@8gk-ttq-sdv,,The Summit Birmingham,"214 Summit Blvd, Vestavia, AL 35243.0, US",Discretionary Retail,Mall,,56.0,9046.647083,2.0,"['2899', '2900']",10730128032,752520.499386,"[52059, 48388, 27174, 40331, 40104, 39828, 276...","[58.0, 30.0, 0.0, 58.0, 58.0, 58.0, 58.0, 58.0...","{'friday': 290.0, 'saturday': 150.0, 'monday':...","{'friday': ['04:36:28', '21:57:00'], 'saturday...","[0, 0, 0, 0, 6, 2, 4, 6, 6, 6, 4, 2, 6, 6, 6, ...",12.099225,50.0,"[13.566666666666666, 14.5]",14.172131,21110,10907,18313,25066,85.866526,8328,"{10730128032: 20168, 10730027001: 1034, 107301...","{'Monday': 127734, 'Tuesday': 131759, 'Wednesd...","[4847, 2764, 4847, 2140, 3345, 6973, 10573, 31...",0
1,zzw-22m@8gk-twj-q75,,Riverchase Galleria,"2000 Riverchase Galleria, Hoover, AL, 35244, US",Discretionary Retail,Mall,,61.0,12857.542473,1.0,['2098'],10730144081,533346.909139,"[33590, 33757, 29733, 17008, 18693, 20500, 203...","[14.0, 10.0, 0.0, 14.0, 14.0, 14.0, 14.0, 14.0...","{'friday': 70.0, 'saturday': 50.0, 'monday': 5...","{'friday': ['06:48:00', '18:58:00'], 'saturday...","[0, 0, 0, 0, 0, 0, 4, 2, 4, 0, 0, 2, 0, 0, 4, ...",32.565217,300.0,[5.0],14.618182,20002,4844,9771,15644,81.012422,5566,"{10730144081: 17848, 10730143021: 8676, 107301...","{'Monday': 58108, 'Tuesday': 78471, 'Wednesday...","[8450, 5892, 4722, 6169, 4634, 6082, 9926, 122...",0
2,zzy-222@8gk-tpx-gtv,,River Ridge,"4606 Highway 280, Birmingham, AL, 35242, US",Discretionary Retail,Mall,,24.0,9826.700561,2.0,"['2909', '2917']",11170303034,377023.813828,"[16601, 19169, 16714, 20347, 11950, 19960, 195...","[29.0, 15.0, 0.0, 29.0, 29.0, 29.0, 29.0, 29.0...","{'friday': 145.0, 'saturday': 75.0, 'monday': ...","{'friday': ['04:31:35', '21:42:19'], 'saturday...","[0, 0, 0, 0, 2, 2, 3, 2, 2, 3, 2, 4, 2, 2, 3, ...",23.970543,1578.0,"[59.55833333333333, 54.35]",23.153846,15637,8890,8252,5268,50.376019,2572,"{11170303034: 14520, 11170303441: 8240, 107301...","{'Monday': 104563, 'Tuesday': 122994, 'Wednesd...","[7271, 3682, 3497, 2779, 3867, 3335, 13129, 33...",0
3,225-222@8gk-tv9-cnq,,University Of Alabama At Birmingham,"1720 University Blvd, Birmingham, AL 35294",College,University,University,141.0,7823.942284,3.0,"['1479', '1480', '1541']",10730045001,319398.478932,"[29368, 27407, 25824, 27252, 27028, 26736, 263...","[52.0, 33.0, 0.0, 52.0, 52.0, 52.0, 52.0, 52.0...","{'friday': 260.0, 'saturday': 165.0, 'monday':...","{'friday': ['05:14:27', '22:05:17'], 'saturday...","[0, 0, 0, 0, 0, 2, 3, 6, 4, 6, 6, 5, 6, 6, 6, ...",12.033730,480.5,"[38.15, 38.43333333333333, 33.858333333333334]",4.711191,22749,2255,2153,7789,114.652473,14384,"{10730045001: 7382, 10730049022: 7772, 1073014...","{'Monday': 30411, 'Tuesday': 29747, 'Wednesday...","[11972, 5343, 7458, 6869, 7015, 8245, 18619, 1...",0
4,zzy-222@8gk-twj-kmk,zzy-223@8gk-twj-kmk,Riverchase Promenade,"1705 Montgomery Hwy Apt 1735, Hoover, AL, 3524...",Discretionary Retail,Shopping Center,,23.0,11646.996678,1.0,['2099'],10730129122,205399.851134,"[9301, 8357, 8190, 8175, 8022, 8951, 8418, 110...","[7.0, 5.0, 0.0, 7.0, 7.0, 7.0, 7.0, 7.0, 5.0, ...","{'friday': 35.0, 'saturday': 25.0, 'monday': 2...","{'friday': ['06:55:55', '19:00:42'], 'saturday...","[0, 0, 0, 0, 0, 0, 2, 1, 2, 0, 0, 1, 0, 0, 2, ...",67.839394,2626.0,[43.766666666666666],16.168675,10014,3500,3597,1937,42.147874,1155,"{10730143021: 3794, 10730129122: 3622, 1073014...","{'Monday': 29702, 'Tuesday': 39129, 'Wednesday...","[1423, 1375, 1197, 1537, 1245, 1763, 4301, 897...",0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2894,223-222@8gk-tsw-tvz,,Church At Southside,"500 20th Ave S, Birmingham, AL, 35205, US",Religious Organizations,Religious Organization,"Churches,Hindu Temple,Mosque,Sikh Temple,Synag...",27.0,488.611927,4.0,"['1492', '1493', '1528', '1529']",10730050002,405.888953,"[54, 54, 54, 81, 81, 81, 81, 81, 81, 81, 81, 8...","[68.0, 44.0, 0.0, 68.0, 68.0, 68.0, 68.0, 68.0...","{'friday': 340.0, 'saturday': 220.0, 'monday':...","{'friday': ['05:28:14', '22:18:58'], 'saturday...","[0, 0, 0, 0, 0, 2, 4, 6, 8, 8, 8, 6, 8, 8, 6, ...",9.105706,44.0,"[40.0, 40.0, 34.141666666666666, 34.1083333333...",28.785714,61,6,10,0,47.556180,12,"{10730050002: 275, 10730050001: 132}","{'Monday': 346, 'Tuesday': 346, 'Wednesday': 2...","[519, 317, 317, 317, 375, 317, 317, 432, 317, ...",0
2895,226-222@8gk-tt9-4gk,,Huffstutler Paint & Body,"712 Graymont Ave N, Birmingham, AL, 35203, US",Personal Services,Auto Body Shop,Body Shops,153.0,19325.559535,10.0,"['1134', '1135', '1276', '1277', '1363', '1364...",10730029003,124.493982,"[27, 36, 27, 44, 44, 27, 44, 44, 0, 27, 44, 44...","[266.0, 209.0, 0.0, 266.0, 266.0, 266.0, 266.0...","{'friday': 1330.0, 'saturday': 1045.0, 'monday...","{'friday': ['04:56:52', '22:06:13'], 'saturday...","[0, 0, 0, 0, 2, 12, 21, 27, 30, 41, 33, 29, 38...",2.277323,41.5,"[10.366666666666667, 10.416666666666666, 19.5,...",5.882353,28,13,9,18,138.650943,38,"{10730111113: 24, 10730027001: 4, 10730037003:...","{'Monday': 100, 'Tuesday': 112, 'Wednesday': 1...","[30, 30, 30, 30, 30, 42, 206, 183, 206, 159, 3...",0
2896,226-223@8gk-tv8-mp9,,Offices of Real Estate Agents and Brokers - 29...,"2911 Crescent Ave, Birmingham, AL 35209","Financial, Legal, Real Estate and Insurance Se...",Real Estate Agents,,197.0,47090.303353,8.0,"['2251', '2435', '2436', '2437', '2438', '2446...",10730107022,125.760000,"[31, 52, 42, 52, 21, 21, 21, 52, 0, 52, 52, 21...","[355.0, 219.0, 0.0, 355.0, 355.0, 355.0, 355.0...","{'friday': 1775.0, 'saturday': 1095.0, 'monday...","{'friday': ['06:18:21', '21:09:53'], 'saturday...","[0, 0, 0, 0, 0, 0, 14, 21, 39, 25, 45, 36, 40,...",1.858732,36.0,"[10.2, 10.216666666666667, 10.133333333333333,...",22.955556,13,7,4,42,153.133333,24,{11210113001: 126},"{'Monday': 414, 'Tuesday': 253, 'Wednesday': 2...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 804, 1148, 1217, 1...",0
2897,227-223@8gm-9n5-6c5,,LaCole's & Ann's Florist,"1604 4th Ave N, Bessemer, AL 35020",Personal Services,Florists,,79.0,6660.742492,13.0,"['1060', '1061', '1087', '1088', '1089', '1090...",10730102002,182.390476,"[33, 33, 33, 33, 33, 50, 33, 33, 33, 50, 33, 3...","[306.0, 247.0, 0.0, 306.0, 306.0, 306.0, 306.0...","{'friday': 1530.0, 'saturday': 1235.0, 'monday...","{'friday': ['04:17:00', '22:28:00'], 'saturday...","[0, 0, 0, 0, 7, 20, 34, 28, 29, 42, 37, 34, 26...",2.217480,41.0,"[11.0, 11.0, 13.2, 13.15, 30.0, 30.0, 38.0, 38...",35.078947,12,6,19,16,136.032468,24,{10730100012: 183},"{'Monday': 351, 'Tuesday': 351, 'Wednesday': 3...","[141, 141, 141, 141, 141, 141, 667, 1088, 842,...",0
