In [568]:
import altair as alt
import pandas as pd
import numpy as np

## Merging Native Economic Trends and NIGC data

LLM usage for finding sources of artifacts from cleaning messy pdf and merging with NET data, visual artifacts in charts from jagged data entries.

In [569]:
net = pd.read_excel('../data/native-economic-trends-data_current.xlsx', sheet_name=1)
nigc = pd.read_csv('../data/clean/clean_gaming_report.csv')

In [570]:
crosswalk = pd.read_csv('../data/clean/verified_crosswalk.csv')

In [571]:
nigc.drop_duplicates().shape

(774, 7)

In [572]:
nigc.shape

(831, 7)

In [573]:
nigc.drop_duplicates(inplace=True)

In [574]:
nigc_facility_counts = nigc.loc[:, ['owner_name', 'is_facility']].groupby(
    by=['owner_name']
    ).sum(

    ).sort_values(
        'is_facility', ascending=False
    )
nigc_facility_counts = nigc_facility_counts.loc[nigc_facility_counts['is_facility']>0, :]
# nigc_facility_counts
# These counts are wrong, at least for top 2 nations

In [575]:
nigc_classes_counts_long = nigc.loc[nigc['is_facility'], ['owner_name', 'classes']].groupby(['owner_name', 'classes']).size()
nigc_classes_counts_long = pd.DataFrame(nigc_classes_counts_long).reset_index().rename(columns={0:'count'})
nigc_classes_counts_wide = nigc_classes_counts_long.pivot(index='owner_name', columns='classes', values='count')#.reset_index(drop=True)

nigc_classes_counts_wide.columns.name = None
nigc_classes_counts_wide.reset_index(inplace=True)

nigc_classes_counts_wide.replace(np.nan, 0, inplace=True)

In [576]:
nigc_classes_counts_wide.columns

Index(['owner_name', 'Class II', 'Class II & III', 'Class III', 'Other'], dtype='object')

In [577]:
nigc_classes_counts_wide.loc[:, ['Class II', 'Class II & III', 'Class III', 'Other']] = nigc_classes_counts_wide.loc[:, ['Class II', 'Class II & III', 'Class III', 'Other']].astype(int)
nigc_all_counts = nigc_facility_counts.merge(nigc_classes_counts_wide, on='owner_name')
nigc_all_counts.fillna(0, inplace=True)

cols_to_int = ['is_facility', 'Class II', 'Class II & III', 'Class III', 'Other']
nigc_all_counts[cols_to_int] = nigc_all_counts[cols_to_int].astype(int)

### Applying crosswalk

In [578]:
# crosswalk = crosswalk.loc[:, ['owner_name', 'geoname_mostrecent']]
# crosswalk.drop(columns=['no_reservation_or_not_listed'])

In [579]:
nigc_all_counts = nigc_all_counts.merge(crosswalk, on='owner_name')

In [580]:
# Outer join bc we care about non-gaming nations as well, of course
gaming_net_crosswalked = nigc_all_counts.merge(net, on='geoname_mostrecent', how='outer')

In [581]:
# gaming_net_crosswalked.to_csv('../data/clean/gaming_net_crosswalked.csv', index=False)

`perpersinc_total`$ = \frac{i_t}{p_t} = \frac{i_a + i_b}{p_a + p_b}$


`perpersinc_aiana`$ = \frac{i_a}{p_a}$, where we know $i_a, i_t, p_a, p_t$ and the named vars, but want to find $i_b, p_b$.


## Visualizations

In [582]:
gaming_net_crosswalked = pd.read_csv('../data/clean/gaming_net_crosswalked.csv')

In [583]:
data_for_bar = gaming_net_crosswalked.copy()
cols_to_fillna = ['is_facility', 'Class II', 'Class II & III', 'Class III', 'Other']
data_for_bar.loc[:, cols_to_fillna] = data_for_bar.loc[:, cols_to_fillna].fillna(0)

In [584]:
data_for_bar = data_for_bar.loc[(data_for_bar['is_facility']>0) & (data_for_bar['datayear']==2020), :]

In [585]:
# alt.Chart(data_for_bar).mark_bar().encode(
#     x = alt.X('owner_name:N'),
#     y = alt.Y('is_facility')
# )

In [586]:
gaming_net_crosswalked.loc[
        gaming_net_crosswalked['is_facility'].isna(), 
        ['is_facility','Class II', 'Class II & III', 'Class III', 'Other']
    ] = 0

In [587]:
gaming_net_crosswalked['geoname_abbrev'] = gaming_net_crosswalked['geoname_mostrecent'].str.removesuffix(
        ' Indian Reservation and Off-Reservation Trust Land'
    ).str.removesuffix(
        ' Reservation and Off-Reservation Trust Land'
    ).str.removesuffix(
        ' and Off-Reservation Trust Land'
    ).str.removesuffix(
        ' Reservation'
    )

In [588]:
# gaming_net_crosswalked.iloc[3084, [6]].str.removesuffix(' Indian Reservation and Off-Reservation Trust Land')

### 1a. Are gaming facilities associated with different incomes in different populations? 

This could honestly be a bar chart, but that might be deceptive comparing mean incomes with very different sample sizes. Much less data for large facility counts.

In [589]:
data_income_pop = gaming_net_crosswalked.copy()
data_income_pop = data_income_pop.loc[
    data_income_pop['datayear']==2018, 
    ['owner_name', 'geoname_mostrecent', 'geoname_abbrev', 'hincmed_total', 'hincmed_aiana','is_facility']
    ]

American Indian or Alaska Native alone (AIANa)

Native Hawaiian and Other Pacific Islander alone (NHOPIa)

In [590]:
data_income_pop_long = data_income_pop.melt(
    id_vars=['owner_name', 'geoname_mostrecent', 'geoname_abbrev', 'is_facility'],
    value_vars=['hincmed_total', 'hincmed_aiana'],
    var_name='hinc_type',
    value_name='hinc_value'
)

In [591]:
data_income_pop_long = data_income_pop_long.replace('hincmed_total', 'Total').replace('hincmed_aiana', 'AIANa')


In [592]:
# data_income_pop_long.loc[:, ['geoname_mostrecent', 'is_facility']]['geoname_mostrecent']

In [593]:
# For each geoname (reservation), sum their facilities.
# Then, join that back with data_income_pop_long and bar chart :)
geoname_facility_sums = data_income_pop_long.loc[:, ['geoname_mostrecent', 'geoname_abbrev', 'is_facility']].drop_duplicates(
    
).groupby(by=['geoname_mostrecent', 'geoname_abbrev']).sum()

geoname_facility_sums = geoname_facility_sums.reset_index().rename(columns={'is_facility':'num_res_facilities'})
geoname_facility_owns = geoname_facility_sums.rename(columns={'num_res_facilities':'owns_facility'})
geoname_facility_owns['owns_facility'] = geoname_facility_owns['owns_facility'] > 0
geoname_facility_owns

Unnamed: 0,geoname_mostrecent,geoname_abbrev,owns_facility
0,Acoma Pueblo and Off-Reservation Trust Land,Acoma Pueblo,True
1,Adai Caddo SDTSA,Adai Caddo SDTSA,False
2,Agua Caliente Indian Reservation and Off-Reser...,Agua Caliente,True
3,Akhiok ANVSA,Akhiok ANVSA,False
4,Akiachak ANVSA,Akiachak ANVSA,False
...,...,...,...
743,Yomba Reservation,Yomba,False
744,Ysleta del Sur Pueblo and Off-Reservation Trus...,Ysleta del Sur Pueblo,True
745,Yurok Reservation,Yurok,True
746,Zia Pueblo and Off-Reservation Trust Land,Zia Pueblo,False


In [594]:
# bar_income_pop = (
#     alt.Chart(data_income_pop_long.merge(geoname_facility_owns))
#     .mark_bar(filled=True, size=50, opacity=0.8)
#     .encode(
#         x=alt.X('owns_facility').title('Owns a gaming facility'),
#         xOffset='hinc_type',
#         y=alt.Y('hinc_value').title('Median household income'),
#         color=alt.Color('hinc_type').scale(
#             scheme='set2'
#             ).title(
#                 'Population group'
#             ).legend(
#                 alt.Legend(fillColor='rgba(255, 255, 255, 0.5)', 
#                            cornerRadius=5,
#                            padding=7)
#             )
#     )
# )

# # Federal median household income in 2018
# datum_income_pop = alt.Chart().mark_rule(color='black', strokeDash=[3,3], strokeWidth=3).encode(
#     y=alt.datum(61937)
# )

# bar_chart_income_pop = bar_income_pop + datum_income_pop

# bar_chart_income_pop.resolve_scale(color='shared').properties(
#     height=300,
#     width=500,
#     title=alt.Title(['Do reservations with any nations that own gaming investments', 'experience greater wealth?'],
#                     anchor='start',
#                     subtitle='Compare household incomes on reservations against the 2018 federal average of $61,937 (dotted line)',
#                     # align='center',
#                     fontSize=22,
#                     dx=25)
# ).configure(
#     font='Agency FB',
# ).configure_axis(
#     labelFontSize=14,
#     titleFontSize=16,
# ).configure_legend(
#     titleFontSize=15,
#     labelFontSize=15,
#     titleAnchor='middle',
#     titleAlign='center',
#     orient='top-right'
# )#.save('./graphics/bar_income_vs_ownsfacility.svg')


### 1b. Is the number of gaming facilities associated with different incomes in different populations? 

NOTE: This has been double-plotting reservations.

In [595]:
data_res_income_pop_long = data_income_pop_long.merge(
    geoname_facility_sums
    ).loc[:, ['geoname_mostrecent', 'geoname_abbrev', 'hinc_type', 'hinc_value', 'num_res_facilities']].drop_duplicates(

    )
# data_res_income_pop_long.loc[data_res_income_pop_long['geoname_mostrecent']=='Acoma Pueblo and Off-Reservation Trust Land', :]
jitter_radius = 0.15
data_res_income_pop_long['jitter'] = np.where(data_res_income_pop_long['hinc_type']=='Total', jitter_radius, -jitter_radius)

res_exclude_list = ['Redding Rancheria',
                    'Sulphur Bank Rancheria',
                    # 'Shakopee Mdewakanton Sioux Community and Off-Reservation Trust Land'
                    ]
data_res_income_pop_long = data_res_income_pop_long.loc[
        ~data_res_income_pop_long['geoname_mostrecent'].isin(res_exclude_list),
        :
    ]

In [596]:
data_res_income_pop_long.sort_values(by='hinc_value', ascending=True)

Unnamed: 0,geoname_mostrecent,geoname_abbrev,hinc_type,hinc_value,num_res_facilities,jitter
1068,Klamath Reservation,Klamath,AIANa,11187.070116,1.0,-0.15
1353,Shageluk ANVSA,Shageluk ANVSA,AIANa,11628.776756,0.0,-0.15
314,Klamath Reservation,Klamath,Total,11628.776756,1.0,0.15
1378,Southeastern Mvskoke Nation SDTSA,Southeastern Mvskoke Nation SDTSA,AIANa,15926.922146,0.0,-0.15
356,Lovelock Indian Colony,Lovelock Indian Colony,Total,16685.347011,0.0,0.15
...,...,...,...,...,...,...
1472,Waimea Hawaiian Home Land,Waimea Hawaiian Home Land,AIANa,,0.0,-0.15
1474,Waiohinu Hawaiian Home Land,Waiohinu Hawaiian Home Land,AIANa,,0.0,-0.15
1475,Waiohuli (Residential) Hawaiian Home Land,Waiohuli (Residential) Hawaiian Home Land,AIANa,,0.0,-0.15
1481,Washoe Ranches Trust Land,Washoe Ranches Trust Land,AIANa,,1.0,-0.15


In [597]:
pop_colors = ['darkturquoise', 'chocolate']
pop_color_scale = alt.Scale(domain=['AIANa', 'Total'], range=pop_colors)
# lightseagreen
# aquamarine
# darkturquoise
# mediumturquoise
# skyblue
# turquoise

# coral
# chocolate
# peru
# sandybrown
# sienna

# pop_color_scheme = dict(zip(['AIANa', 'Total'], ['mediumaquamarine', 'chocolate']))

In [598]:
# AI: ChatGPT for finding likely sources of graphical artifacts in messy data
# "why is loess zig-zag after altair .transform_loess()"
manual_xticks = list(range(0,1+data_res_income_pop_long['num_res_facilities'].max().astype(int)))
manual_xticks_minor = manual_xticks[1::2]
# shared_color = alt.Color('hinc_type:N', title='Population group').scale(pop_color_scale)
shared_color = alt.Color('hinc_type:N', title='Population group').scale(range=pop_colors)
# Oh! Specifically entering the color/pop domain makes Altair think it is a
## different domain from the one shape uses! They are the same

grid_layer = (
    alt.Chart(pd.DataFrame({'x': manual_xticks_minor}))
    .mark_rule(color='lightgray', size=0.5)
    .encode(x='x:Q')
)

scatter_income_pop = (
    alt.Chart(data_res_income_pop_long)
    .transform_calculate(
        num_res_facilities_jitter='datum.num_res_facilities + datum.jitter'
    )
    .mark_point(filled=False, size=20, strokeWidth=2, opacity=0.48)
    .encode(
        x=alt.X('num_res_facilities_jitter:Q').title(
            'Number of gaming facilities'
            ).axis(
                values=manual_xticks,
            ).scale(
                padding=0
        ),
        # xOffset=alt.XOffset('jitter:Q'),
        y=alt.Y('hinc_value').title('Median household income'),
        color=shared_color,
        shape=alt.Shape('hinc_type:N', title='Population group').scale(
            # domain=['AIANa', 'Total'],
            range=['triangle-left', 'triangle-right']
            ),
        order=alt.Order('hinc_type:N', sort='descending')
    )
)

loess_opacity = 0.85
loess_income_pop = (
    alt.Chart(data_res_income_pop_long)
    .transform_loess(
        'num_res_facilities', 'hinc_value', groupby=['hinc_type']
    )
    .mark_line(filled=False, size=4, opacity=loess_opacity)
    .encode(
        x=alt.X('num_res_facilities:Q'),
        y=alt.Y('hinc_value'),
        color=shared_color.legend(None),
    )
)

# Federal median household income in 2018
datum_income_pop = alt.Chart().mark_rule(color='black', strokeDash=[3,3], strokeWidth=3).encode(
    y=alt.datum(61937)
)

# Annotate the wealthiest nation
annotations = alt.Chart(data_res_income_pop_long).transform_calculate(
        num_res_facilities_jitter='datum.num_res_facilities + datum.jitter'
    ).mark_text(
    align='left',
    baseline='middle',
    dx=5,
    dy=2,
    fontSize=13,
    # fontWeight='bold',
).encode(
    x='num_res_facilities_jitter:Q',
    y='hinc_value',
    text='geoname_abbrev',
    color=shared_color.legend(None),
).transform_filter(
    alt.FieldOneOfPredicate(field='geoname_abbrev', oneOf=['Shakopee Mdewakanton Sioux Community'])
)


chart_income_pop = grid_layer + datum_income_pop + scatter_income_pop + loess_income_pop + annotations


chart_income_pop.properties(
    height=300,
    width=500,
    title=alt.Title(['Do reservations with many gaming investments', 'experience greater wealth?'],
                    anchor='start',
                    subtitle='Household incomes on reservations (colored lines) consistently trended below the 2018 federal average of $61,937 (dotted line)',
                    subtitleFontSize=13,
                    # align='center',
                    fontSize=22,
                    dx=25
                    )
).configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=15,
    labelFontSize=15,
    titleAnchor='middle',
    titleAlign='center',
    orient='top-right',
    fillColor='rgba(255, 255, 255, 0.50)', 
    cornerRadius=25,
    padding=28
).resolve_scale(
    xOffset='independent',
    color='independent',
    shape='independent'
)#.save('./graphics/scatter_income_vs_facilitycount.svg')


In [599]:
# TODO:
# Redo as bar chart with error bars, one bar per Number of gaming facilities

### 2. State poverty level compared to Indian nations' poverty levels

In [600]:
poverty_levels = gaming_net_crosswalked.dropna(subset=['owner_name', 'geoname_mostrecent'])
poverty_levels = poverty_levels.merge(nigc.loc[:,['owner_name', 'owner_state']], on='owner_name', how='left')
poverty_levels.dropna(subset=['datayear'], inplace=True)
poverty_levels = poverty_levels.loc[:, ['owner_name', 'owner_state', 'datayear', 'povshare_total']].drop_duplicates()
poverty_levels['datayear_dt'] = pd.to_datetime(poverty_levels['datayear'].astype(int), format='%Y')

In [601]:
# Many issues with states. Drop the nonsense ones
abbreviations = [
    # https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#States.
    "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DE", "FL", "GA", "HI", "IA",
    "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN", "MO",
    "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV", "NY", "OH", "OK",
    "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI",
    "WV", "WY",
]

In [602]:
poverty_levels = poverty_levels.loc[poverty_levels['owner_state'].isin(abbreviations), :]

There are far too many states to put on here. Just choose the 3 states with the most facilities.

In [603]:
states_5_most_facilities = nigc['owner_state'].value_counts()[:4].index.tolist()

In [604]:
states_5_most_facilities

['OK', 'CA', 'PO', 'AZ']

In [605]:
poverty_levels_top = poverty_levels.loc[poverty_levels['owner_state'].isin(states_5_most_facilities), :]

In [606]:
# spaghetti_poverty = alt.Chart(poverty_levels_top).mark_bar().encode(
#     x=alt.X(
#         'datayear_dt:O',
#         title='Year',
#         timeUnit='year',
#         axis=alt.Axis(format='%Y')
#     ),
#     y=alt.Y('max(povshare_total):Q'),
#     color=alt.Color('owner_state:N'),
#     xOffset='owner_state:N'
# )
# spaghetti_poverty

In [656]:
strip_poverty = alt.Chart(poverty_levels).mark_tick(
        color='black',
    ).encode(
        x=alt.X('povshare_total').title('Percentage of total population in poverty'),
        y=alt.Y('datayear:O').title('Year'),
        # color=alt.Color('owner_state')
)
median_datum = alt.Chart(poverty_levels).mark_tick(
        color='mediumseagreen', 
        strokeDash=[5,5],
        thickness=6,
        # size=20,
        cornerRadius=3,
        opacity=0.95
    ).encode(
        x=alt.X('median(povshare_total):Q'),
        y=alt.Y('datayear:O'),
        # color='datayear'
    )

chart_poverty = strip_poverty + median_datum

chart_poverty.properties(
    height=300,
    width=500,
    title=alt.Title(['Has reservation poverty gotten less pervasive', 'over the years?'],
                    anchor='start',
                    subtitle='Each strip is a reservation statistical area. Median reservation poverty rates are indicated by blue lines.',
                    # align='center',
                    fontSize=22,
                    subtitleFontSize=15,
                    dx=25)
).configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=15,
    labelFontSize=15,
    titleAnchor='middle',
    titleAlign='center',
    orient='top-right'
)#.save('./graphics/strip_poverty_vs_year.svg')


### 3. States vs. the Nations they share borders with

In [608]:
gaming_net_crosswalked_state = gaming_net_crosswalked.merge(nigc.loc[:, ['owner_name','owner_state']], 
                                                            on='owner_name',
                                                            how='left')

In [609]:
gaming_net_crosswalked_state['geocat_mostrecent'].value_counts()

geocat_mostrecent
Federal Reservations/Off-Reservation Trust Lands      4965
Alaska Native Village Statistical Areas (ANVSAs)      1540
Oklahoma Tribal Statistical Areas (OTSAs)             1108
Hawaiian Home Lands (HHLs)                             435
All states and commonwealths                           362
State-designated Tribal Statistical Areas (SDTSAs)     182
Tribally Designated Statistical Areas (TDSAs)           73
State Reservations                                      71
Oklahoma Joint-Use Areas                                25
Federal Joint-Use Areas                                 20
United States (national)                                 7
Name: count, dtype: int64

In [610]:
# Get all states, and get all nations with gaming facilities
gaming_net_crosswalked_state = gaming_net_crosswalked_state.loc[
    (~gaming_net_crosswalked_state['geocat_mostrecent'].isin(['United States (national)'])) ,
    :
]

gaming_net_crosswalked_state = gaming_net_crosswalked_state.loc[
    (gaming_net_crosswalked_state['geocat_mostrecent']=='All states and commonwealths') | (gaming_net_crosswalked_state['is_facility']>0),
    :
].drop_duplicates()

In [611]:
is_us_state = np.where(gaming_net_crosswalked_state['geocat_mostrecent']=='All states and commonwealths', True, False)
gaming_net_crosswalked_state['is_us_state'] = is_us_state

In [612]:
abbreviation_to_name = {
    # https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#States.
    "AK": "Alaska",
    "AL": "Alabama",
    "AR": "Arkansas",
    "AZ": "Arizona",
    "CA": "California",
    "CO": "Colorado",
    "CT": "Connecticut",
    "DE": "Delaware",
    "FL": "Florida",
    "GA": "Georgia",
    "HI": "Hawaii",
    "IA": "Iowa",
    "ID": "Idaho",
    "IL": "Illinois",
    "IN": "Indiana",
    "KS": "Kansas",
    "KY": "Kentucky",
    "LA": "Louisiana",
    "MA": "Massachusetts",
    "MD": "Maryland",
    "ME": "Maine",
    "MI": "Michigan",
    "MN": "Minnesota",
    "MO": "Missouri",
    "MS": "Mississippi",
    "MT": "Montana",
    "NC": "North Carolina",
    "ND": "North Dakota",
    "NE": "Nebraska",
    "NH": "New Hampshire",
    "NJ": "New Jersey",
    "NM": "New Mexico",
    "NV": "Nevada",
    "NY": "New York",
    "OH": "Ohio",
    "OK": "Oklahoma",
    "OR": "Oregon",
    "PA": "Pennsylvania",
    "RI": "Rhode Island",
    "SC": "South Carolina",
    "SD": "South Dakota",
    "TN": "Tennessee",
    "TX": "Texas",
    "UT": "Utah",
    "VA": "Virginia",
    "VT": "Vermont",
    "WA": "Washington",
    "WI": "Wisconsin",
    "WV": "West Virginia",
    "WY": "Wyoming",
}

In [613]:
name_to_abbreviation = {value: key for key, value in abbreviation_to_name.items()}

In [614]:
gaming_net_crosswalked_state.loc[
        gaming_net_crosswalked_state['is_us_state'],
        'owner_state'
    ] = gaming_net_crosswalked_state.loc[
        gaming_net_crosswalked_state['is_us_state'],
        'geoname_mostrecent'
    ].str.replace(' (state)', '').map(name_to_abbreviation)

In [615]:
gaming_net_crosswalked_state.loc[
    gaming_net_crosswalked_state['is_us_state'],
    'owner_name'
] = gaming_net_crosswalked_state.loc[
        gaming_net_crosswalked_state['is_us_state'],
        'geoname_mostrecent'
    ]

In [651]:
gaming_net_crosswalked_state['owner_state_full'] = gaming_net_crosswalked_state['owner_state'].map(abbreviation_to_name)

In [616]:
# gaming_net_crosswalked_state['owner_state'].value_counts()

gaming_net_crosswalked_state['owner_state'][gaming_net_crosswalked_state['owner_state'].isin(abbreviation_to_name.keys())]

0       NM
3       NM
6       NM
9       NM
12      NM
        ..
8774    CA
8776    CA
8792    OK
8795    OK
8797    CA
Name: owner_state, Length: 1736, dtype: object

In [None]:
bar_unemployment = alt.Chart(gaming_net_crosswalked_state.loc[
    (gaming_net_crosswalked_state['owner_state'].isin(['MT', 'OR', 'WA'])) & (gaming_net_crosswalked_state['datayear'].isin([2023])),
    :
]).mark_bar().encode(
    x=alt.X('geoname_abbrev', sort='-y').title(''),
    y=alt.Y('unemp_total').title('Unemployment Rate'),
    facet=alt.Facet('owner_state_full').title(''),
    fill=alt.Fill('is_us_state').scale(range=pop_colors).legend(None)
).resolve_scale(x='independent')

bar_unemployment.properties(
    height=300,
    width=250,
    title=alt.Title(['Northwestern reservations exceed nearby state unemployment rates (2023)'],
                    anchor='start',
                    subtitle='State-wide unemployment rates highlighted',
                    # align='center',
                    fontSize=22,
                    subtitleFontSize=15,
                    dx=25)
).configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_headerFacet(
    labelFontSize=18
)#.save('./graphics/bar_state_facet_unemployment_vs_reservation.svg')
# Texture for state vs. nation, top X unemployment rates, facet by year?

### 4 and 5. Zooming in on Montana Nations: Bump Chart?

In [618]:
mt = gaming_net_crosswalked_state.loc[
        (gaming_net_crosswalked_state['owner_state']=='MT'), 
        :
    ]

#  & (gaming_net_crosswalked_state['datayear']==2023)

In [619]:
mt['datayear'] = pd.to_datetime(mt['datayear'], format='%Y')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  mt['datayear'] = pd.to_datetime(mt['datayear'], format='%Y')


In [620]:
homeownership_rank = alt.Chart(mt).mark_line(point=True, filled=False).encode(
    x=alt.X('datayear:T').title('Year'),
    y=alt.Y('rank_val:O').title('Rank'),
    color=alt.Color('geoname_mostrecent:N').title('Reservation/State'),
    detail='geoname_mostrecent:N',
).transform_window(
    rank_val='rank()',
    sort=[
        alt.SortField('ownoccshare_aiana', order='descending')
    ],
    groupby=['datayear'],
).properties(
    width=500,
    height=200,
    title=alt.Title(['MT and reservations ranked by AIANa house ownership'],
                    anchor='start',
                    subtitle='Off-reservation AIANa people living in houses own the house less frequently than do AIANa people on reservations',
                    # align='center',
                    fontSize=22,
                    dx=25
                    )
)

homeownership_rank.configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=15,
    labelFontSize=15,
    titleAnchor='middle',
    titleAlign='right',
    # orient='top-right'
)#.save('./graphics/rank_mt_houseownership_vs_year.svg')

In [621]:
mt.loc[mt['geoname_mostrecent']=='Montana (state)', ['datayear', 'hincmed_total']]

Unnamed: 0,datayear,hincmed_total
4756,1990-01-01,53609.587729
4757,2000-01-01,58438.080046
4758,2010-01-01,
4759,2013-01-01,60469.490463
4760,2018-01-01,63779.283893
4761,2020-01-01,
4762,2023-01-01,69922.0


In [622]:
mt_long = mt.melt(
    id_vars = ['geoname_mostrecent', 'datayear'],
    value_vars = ['povshare_aiana', 'povshare_total', 
                  'hincmed_aiana', 'hincmed_total',
                  'ownoccshare_aiana', 'ownoccshare_total'],
    var_name = 'pop_group',
    value_name = 'value'
)

In [623]:
mt_long['population_group'] = mt_long['pop_group'].str.extract(r'_(aiana|total)$')

In [624]:
mt_long['outcome'] = mt_long['pop_group'].str.extract(r'^(povshare|hincmed|ownoccshare)')

In [625]:
mt_Long = mt_long.pivot_table(
    index=['datayear', 'geoname_mostrecent', 'population_group'],
    columns='outcome',
    values='value'
).reset_index()

In [626]:
# mt_long.loc[mt_long['pop_group']=='ownoccshare_total', :]

In [627]:
mt_long_ = mt_long.copy()
mt_long_['outcome'] = mt_long_['outcome'].str.replace('ownoccshare', '% of homes occupied by owner')
mt_long_['outcome'] = mt_long_['outcome'].str.replace('povshare', '% in poverty')
mt_long_['outcome'] = mt_long_['outcome'].str.replace('hincmed', 'Median household income')

In [628]:
# ownoccshare_aianac
color_total = 'rgb(201, 134, 64)'
color_aiana = 'rgb(64, 187, 201)'

mt_facet_povshare_aiana = alt.Chart(mt_long_).mark_line(color=color_aiana, opacity=0.5).encode(
    x=alt.X('datayear:T', ).title('Year'),
    y=alt.Y('value').title(''),
    color=alt.Color('population_group').title('Population group'),
    detail=alt.Detail('geoname_mostrecent:N'),
    row=alt.Row('outcome').title('Socioeconomic outcome')
).resolve_scale(
    y='independent'
).properties(
    height=100,
    width=500,
    title=alt.Title(['AIANa populations consistently behind', 'in socioeconomic measures on MT reservations'],
                    anchor='start',
                    subtitle='Across Montana reservations, native-identifying peoples\' outcomes lag. Each line represents a reservation.',
                    # align='center',
                    fontSize=22,
                    subtitleFontSize=15,
                    dx=25)
)

mt_facet_povshare_aiana.configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=15,
    labelFontSize=15,
    titleAnchor='middle',
    titleAlign='center',
    orient='right'
).configure_headerRow(
    labelFontSize=20,
    titleFontSize=20,
    labelAngle=0,
    labelAlign='left',
    labelAnchor='middle',
    labelOrient='left'
)#.save('./graphics/line_mt_outcome_facet_outcome_vs_year.svg')


* Replace with bars, explain why missing in annotation

In [629]:
# nigc_mt = nigc.loc[(nigc['owner_state']=='MT') & (nigc['is_facility']), :]
# nigc_mt = nigc_mt.merge(crosswalk, on='owner_name')
# mt_nations = nigc_mt.merge(net, on='geoname_mostrecent').drop_duplicates()
# mt_state = net.loc[net['geoname_mostrecent']=='Montana (state)', :]

### 6 and 7. Scatter plots: Gaming vs. No Gaming

In [630]:
gaming_compare = gaming_net_crosswalked.copy()

In [631]:
gaming_compare['geocat_mostrecent'].value_counts()

geocat_mostrecent
Federal Reservations/Off-Reservation Trust Lands      2250
Alaska Native Village Statistical Areas (ANVSAs)      1526
Hawaiian Home Lands (HHLs)                             435
All states and commonwealths                           362
Oklahoma Tribal Statistical Areas (OTSAs)              195
State-designated Tribal Statistical Areas (SDTSAs)     182
State Reservations                                      71
Tribally Designated Statistical Areas (TDSAs)           43
Oklahoma Joint-Use Areas                                25
Federal Joint-Use Areas                                 20
United States (national)                                 7
Name: count, dtype: int64

In [632]:
gaming_compare = gaming_compare.loc[
    ~gaming_compare['geocat_mostrecent'].isin(['All states and commonwealths', 'United States (national)']),
    :
]

In [633]:
gaming_compare['has_facility'] = np.where(gaming_compare['is_facility']>0, 'Has facility', 'Doesn\'t have facility')

In [634]:
gaming_compare23 = gaming_compare.loc[gaming_compare['datayear']==2023, :]

In [635]:
scatter_incomes = alt.Chart(gaming_compare23).mark_point(filled=True, opacity=0.8).encode(
    x=alt.X('hincmed_aiana').title('Median income'),
    y=alt.Y('educshare_aiana_bachplus').title('Percentage of adults 25+ with bachelor\'s or higher'),
    color=alt.Color('has_facility:N').legend(
                alt.Legend(fillColor='rgba(255, 255, 255, 0.5)', 
                           cornerRadius=5,
                           padding=7,
                           title='Nation ownership')
            )#.scale(scheme='blueorange'),
    # size=alt.Size('is_facility')
)

scatter_incomes.properties(
    height=300,
    width=500,
    title=alt.Title(['Do wealth and gaming facility ownership affect reservations\'', 'educational outcomes?'],
                    anchor='start',
                    subtitle='American Indians and Alaska Natives in 2023 only.',
                    # align='center',
                    fontSize=22,
                    subtitleFontSize=15,
                    dx=25)
).configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=15,
    labelFontSize=15,
    titleAnchor='middle',
    titleAlign='center',
    orient='top-right'
)#.save('./graphics/scatter_bachelors_vs_income_byfacility.svg')

In [636]:
scatter_child_poverty = alt.Chart(gaming_compare23).mark_point(filled=True, opacity=0.8).encode(
    x=alt.X('hincmed_aiana').title('Median income'),
    y=alt.Y('youthpovshare_aiana').title('Percentage of youths (under 18) experiencing poverty'),
    color=alt.Color('has_facility:N').legend(
                alt.Legend(fillColor='rgba(255, 255, 255, 0.5)', 
                           cornerRadius=5,
                           padding=7,
                           title='Nation ownership')
            )#.scale(scheme='blueorange'),
    # size=alt.Size('is_facility')
)

scatter_child_poverty.properties(
    height=300,
    width=500,
    title=alt.Title(['Do wealth and gaming facility ownership correspond', 'with child poverty?'],
                    anchor='start',
                    subtitle='American Indians and Alaska Natives in 2023 only.',
                    # align='center',
                    fontSize=22,
                    subtitleFontSize=15,
                    dx=25)
).configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=15,
    labelFontSize=15,
    titleAnchor='middle',
    titleAlign='center',
    orient='top-right'
)#.save('./graphics/scatter_youthpoverty_vs_income_byfacility.svg')

#### PENDING: 5 wealthiest vs 5 poorest nations and their gaming facilities / population / 

In [637]:
# gaming_net_crosswalked.loc[
#     gaming_net_crosswalked['datayear']==2023,
#     :
# ]

### 8. Presence of gaming facilities and aggregate income in all reservations

In [638]:
agginc = gaming_net_crosswalked.copy()
agginc.head(2)

Unnamed: 0,owner_name,is_facility,Class II,Class II & III,Class III,Other,geoname_mostrecent,no_reservation_or_not_listed,geoid_mostrecent,geocat_mostrecent,...,perpersinc_nhopia,povshare_total,povshare_aiana,povshare_nhopia,youthpovshare_total,youthpovshare_aiana,youthpovshare_nhopia,incagg_total,source,geoname_abbrev
0,"Pueblo of Acoma, New Mexico",2.0,0.0,1.0,1.0,0.0,Acoma Pueblo and Off-Reservation Trust Land,,10,Federal Reservations/Off-Reservation Trust Lands,...,,49.650892,60.264901,,55.823293,55.823293,,24945650.0,1990 decennial census and TIGER/Line shapefiles,Acoma Pueblo
1,"Pueblo of Acoma, New Mexico",2.0,0.0,1.0,1.0,0.0,Acoma Pueblo and Off-Reservation Trust Land,,10,Federal Reservations/Off-Reservation Trust Lands,...,0.0,29.769618,30.132939,,32.494759,32.908705,,43566310.0,2000 decennial census and TIGER/Line shapefiles,Acoma Pueblo


In [639]:
agginc = agginc.loc[
    ~agginc['geocat_mostrecent'].isin(['All states and commonwealths', 'United States (national)']),
    :
]
agginc['has_facility'] = np.where(agginc['is_facility']>0, 'Has facility', 'Doesn\'t have facility')
agginc23 = agginc.loc[
    agginc['datayear'].isin([1990, 2000, 2018, 2023]),
    ['owner_name', 'geoname_mostrecent', 'datayear', 'has_facility', 'incagg_total', 'pop_total']
].drop_duplicates()

In [640]:
agginc23['has_facility'].value_counts()

has_facility
Doesn't have facility    1718
Has facility              920
Name: count, dtype: int64

In [641]:
agginc23_grouped = agginc23.drop(columns=['owner_name', 'geoname_mostrecent']).groupby(['datayear', 'has_facility']).sum()
agginc23_grouped['incpercap_total'] = agginc23_grouped['incagg_total'] / agginc23_grouped['pop_total']
agginc23_grouped.reset_index(inplace=True)

In [642]:
bar_facilities = alt.Chart(agginc23_grouped).mark_bar().encode(
    x=alt.X('has_facility').title('').axis(alt.Axis(labelAngle=-45)),
    y=alt.Y('incpercap_total').title('Income per capita (all populations)'),
    facet=alt.Facet('datayear').title(''),
    color=alt.X('has_facility', legend=None).scale(scheme='set2')
)

bar_facilities.properties(
    height=300,
    width=150,
    title=alt.Title(['Has the presence of gaming facilities accompanied overall', 'reservation wealth?'],
                    anchor='start',
                    subtitle='In 2023 dollars',
                    # align='center',
                    fontSize=22,
                    subtitleFontSize=15,
                    dx=25)
).configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=15,
    labelFontSize=15,
    titleAnchor='middle',
    titleAlign='center',
    orient='top-right'
).configure_header(
    labelFontSize=15,
)#.save('./graphics/bar_year_facet_incomepercap_vs_facility.svg')

In [643]:
# line_facilities = alt.Chart(agginc23_grouped).mark_line().encode(
#     x=alt.X('datayear').title(''),
#     y=alt.Y('incpercap_total').title('Income per capita (all populations)'),
#     # facet=alt.Facet('has_facility').title(''),
#     color=alt.X('has_facility', legend=None).scale(scheme='set2')
# )

# line_facilities.properties(
#     height=300,
#     width=400,
#     title=alt.Title(['Has the presence of gaming facilities accompanied overall', 'reservation wealth?'],
#                     anchor='start',
#                     subtitle='In 2023 dollars',
#                     # align='center',
#                     fontSize=22,
#                     dx=25)
# ).configure(
#     font='Agency FB',
# ).configure_axis(
#     labelFontSize=14,
#     titleFontSize=16,
# ).configure_legend(
#     titleFontSize=15,
#     labelFontSize=15,
#     titleAnchor='middle',
#     titleAlign='center',
#     orient='top-right'
# )

### End