In [2]:
import altair as alt
import pandas as pd
import numpy as np

## Merging Native Economic Trends and NIGC data

In [171]:
net = pd.read_excel('../data/native-economic-trends-data_current.xlsx', sheet_name=1)
nigc = pd.read_csv('../data/clean/clean_gaming_report.csv')

In [290]:
crosswalk = pd.read_csv('../data/clean/verified_crosswalk.csv')

In [42]:
# Let's grab the number of facilities listed in each owner.
# This is not really a reliable count because many facilities appear to be multi-counted.
# But that is mostly a hunch.
# nigc.loc[nigc['owner_name']=='Casino Morongo',:]

In [43]:
nigc.drop_duplicates().shape

(774, 7)

In [44]:
nigc.shape

(831, 7)

In [45]:
nigc.drop_duplicates(inplace=True)

In [46]:
nigc_facility_counts = nigc.loc[:, ['owner_name', 'is_facility']].groupby(
    by=['owner_name']
    ).sum(

    ).sort_values(
        'is_facility', ascending=False
    )
nigc_facility_counts = nigc_facility_counts.loc[nigc_facility_counts['is_facility']>0, :]
nigc_facility_counts
# These counts are wrong, at least for top 2 nations

Unnamed: 0_level_0,is_facility
owner_name,Unnamed: 1_level_1
"Chickasaw Nation, Oklahoma",25
Choctaw Nation of Oklahoma,23
White Earth Nation,21
"Muscogee (Creek) Nation, Oklahoma",12
"Cherokee Nation, Oklahoma",10
...,...
Washoe Tribe of Nevada and California,1
"Yavapai Apache Nation of the Camp Verde Indian Reservation, AZ",1
Yocha DeHe Wintun Nation,1
Ysleta del Sur Pueblo,1


In [51]:
nigc_classes_counts_long = nigc.loc[nigc['is_facility'], ['owner_name', 'classes']].groupby(['owner_name', 'classes']).size()

In [64]:
nigc_classes_counts_long = pd.DataFrame(nigc_classes_counts_long).reset_index().rename(columns={0:'count'})

In [99]:
nigc_classes_counts_wide = nigc_classes_counts_long.pivot(index='owner_name', columns='classes', values='count')#.reset_index(drop=True)


In [101]:
nigc_classes_counts_wide.columns.name = None

In [104]:
nigc_classes_counts_wide.reset_index(inplace=True)

In [108]:

nigc_classes_counts_wide.replace(np.nan, 0, inplace=True)

In [109]:
nigc_classes_counts_wide.columns

Index(['owner_name', 'Class II', 'Class II & III', 'Class III', 'Other'], dtype='object')

In [111]:
nigc_classes_counts_wide.loc[:, ['Class II', 'Class II & III', 'Class III', 'Other']] = nigc_classes_counts_wide.loc[:, ['Class II', 'Class II & III', 'Class III', 'Other']].astype(int)

In [113]:
nigc_all_counts = nigc_facility_counts.merge(nigc_classes_counts_wide, on='owner_name')

In [130]:
nigc_all_counts.fillna(0, inplace=True)

In [None]:

cols_to_int = ['is_facility', 'Class II', 'Class II & III', 'Class III', 'Other']
nigc_all_counts[cols_to_int] = nigc_all_counts[cols_to_int].astype(int)

In [146]:
nigc_all_counts

Unnamed: 0,owner_name,is_facility,Class II,Class II & III,Class III,Other
0,"Chickasaw Nation, Oklahoma",25,2,23,0,0
1,Choctaw Nation of Oklahoma,23,0,23,0,0
2,White Earth Nation,21,19,2,0,0
3,"Muscogee (Creek) Nation, Oklahoma",12,0,12,0,0
4,"Cherokee Nation, Oklahoma",10,0,10,0,0
...,...,...,...,...,...,...
235,Washoe Tribe of Nevada and California,1,0,0,1,0
236,Yavapai Apache Nation of the Camp Verde Indian...,1,0,1,0,0
237,Yocha DeHe Wintun Nation,1,0,1,0,0
238,Ysleta del Sur Pueblo,1,0,1,0,0


### Applying crosswalk

In [148]:
# crosswalk = crosswalk.loc[:, ['owner_name', 'geoname_mostrecent']]
# crosswalk.drop(columns=['no_reservation_or_not_listed'])

In [149]:
nigc_all_counts = nigc_all_counts.merge(crosswalk, on='owner_name')

In [153]:
# Outer join bc we care about non-gaming nations as well, of course
gaming_net_crosswalked = nigc_all_counts.merge(net, on='geoname_mostrecent', how='outer')

In [None]:
# gaming_net_crosswalked.to_csv('../data/clean/gaming_net_crosswalked.csv', index=False)

## Visualizations

In [3]:
gaming_net_crosswalked = pd.read_csv('../data/clean/gaming_net_crosswalked.csv')

In [4]:
gaming_net_crosswalked.head(2)

Unnamed: 0,owner_name,is_facility,Class II,Class II & III,Class III,Other,geoname_mostrecent,no_reservation_or_not_listed,geoid_mostrecent,geocat_mostrecent,...,perpersinc_aiana,perpersinc_nhopia,povshare_total,povshare_aiana,povshare_nhopia,youthpovshare_total,youthpovshare_aiana,youthpovshare_nhopia,incagg_total,source
0,"Pueblo of Acoma, New Mexico",2.0,0.0,1.0,1.0,0.0,Acoma Pueblo and Off-Reservation Trust Land,,10,Federal Reservations/Off-Reservation Trust Lands,...,9442.544837,,49.650892,60.264901,,55.823293,55.823293,,24945650.0,1990 decennial census and TIGER/Line shapefiles
1,"Pueblo of Acoma, New Mexico",2.0,0.0,1.0,1.0,0.0,Acoma Pueblo and Off-Reservation Trust Land,,10,Federal Reservations/Off-Reservation Trust Lands,...,15359.815128,0.0,29.769618,30.132939,,32.494759,32.908705,,43566310.0,2000 decennial census and TIGER/Line shapefiles


In [5]:
data_for_bar = gaming_net_crosswalked.copy()
cols_to_fillna = ['is_facility', 'Class II', 'Class II & III', 'Class III', 'Other']
data_for_bar.loc[:, cols_to_fillna] = data_for_bar.loc[:, cols_to_fillna].fillna(0)

In [6]:
data_for_bar = data_for_bar.loc[(data_for_bar['is_facility']>0) & (data_for_bar['datayear']==2020), :]

In [7]:
data_for_bar.shape

(235, 82)

In [8]:
alt.Chart(data_for_bar).mark_bar().encode(
    x = alt.X('owner_name:N'),
    y = alt.Y('is_facility')
)

In [None]:
# # alt.data_transformers.enable('vegafusion')

# alt.data_transformers.disable_max_rows()
# alt.Chart(gaming_net_crosswalked).mark_bar().encode(
#     x = alt.X('is_facility', bin=alt.Bin(maxbins=30)),
#     y = alt.Y('count()').title('Count')
# )

### Ideas

* Count of gaming facilities over time (Don't have historical gaming data, though)
* Count of gaming facilities vs population (highlight by type?)
* Scatterplot relationship between pop_aianac and something else
* Are gaming facilities associated with different incomes in different populations? Scatterplot with 3 colors of hincmed_total, hincmed_aiana, and hincmed_nhopia against facility count


### 1. Are gaming facilities associated with different incomes in different populations? 

This could honestly be a bar chart, but that might be deceptive comparing mean incomes a with very different sample sizes. Much less data for large facility counts.

In [417]:
data_income_pop = gaming_net_crosswalked.copy()
data_income_pop = data_income_pop.loc[
    data_income_pop['datayear']==2018, 
    ['owner_name', 'geoname_mostrecent', 'hincmed_total', 'hincmed_aiana','is_facility']
    ]

In [418]:
data_income_pop

Unnamed: 0,owner_name,geoname_mostrecent,hincmed_total,hincmed_aiana,is_facility
4,"Pueblo of Acoma, New Mexico",Acoma Pueblo and Off-Reservation Trust Land,51952.709932,53026.639539,2.0
10,,Adai Caddo SDTSA,34698.240903,30336.994565,
17,Agua Caliente Band of Cahuilla Indians,Agua Caliente Indian Reservation and Off-Reser...,65361.661529,,4.0
24,,Akhiok ANVSA,,65224.538314,
31,,Akiachak ANVSA,46643.735883,46643.735883,
...,...,...,...,...,...
5087,,Yomba Reservation,66235.366973,66235.366973,
5094,Ysleta del Sur Pueblo,Ysleta del Sur Pueblo and Off-Reservation Trus...,42821.274568,36309.742054,1.0
5101,Yurok Tribe of the Yurok Reservation,Yurok Reservation,38258.590585,24838.717670,1.0
5108,,Zia Pueblo and Off-Reservation Trust Land,50489.253314,50814.465896,


In [419]:
# data_income_pop = data_income_pop.loc[~data_income_pop['is_facility'].isna(), :]
data_income_pop.loc[data_income_pop['is_facility'].isna(), ['is_facility']] = 0

American Indian or Alaska Native alone (AIANa)

Native Hawaiian and Other Pacific Islander alone (NHOPIa)

In [420]:
data_income_pop_long = data_income_pop.melt(
    id_vars=['owner_name', 'geoname_mostrecent', 'is_facility'],
    value_vars=['hincmed_total', 'hincmed_aiana'],
    var_name='hinc_type',
    value_name='hinc_value'
)

In [421]:
data_income_pop_long = data_income_pop_long.replace('hincmed_total', 'Total').replace('hincmed_aiana', 'AIANa')


In [422]:
scatter_income_pop = (
    alt.Chart(data_income_pop_long)
    .mark_point(filled=True, size=50, opacity=0.8)
    .encode(
        x=alt.X('is_facility').title('Number of gaming facilities'),
        y=alt.Y('hinc_value').title('Median household income'),
        color=alt.Color('hinc_type').scale(
            scheme='set2'
            ).title(
                'Population group'
            ).legend(
                alt.Legend(fillColor='rgba(255, 255, 255, 0.5)', 
                           cornerRadius=5,
                           padding=7)
            )
    )
)

loess_opacity = 0.7
loess_outline_income_pop = (
    alt.Chart(data_income_pop_long)
    .transform_loess('is_facility', 'hinc_value', groupby=['hinc_type'])
    .mark_line(
        color='black',
        strokeWidth=8,
        strokeDash=[15,15],
        opacity=loess_opacity
    )
    .encode(
        x='is_facility',
        y='hinc_value',
        detail='hinc_type'
    )
)
loess_income_pop = scatter_income_pop.transform_loess('is_facility', 'hinc_value', groupby=['hinc_type']).mark_line(
        strokeWidth=4,
        strokeDash=[15,15],
        opacity=loess_opacity
)

# Federal median household income in 2018
datum_income_pop = alt.Chart().mark_rule(color='black', strokeDash=[2,2]).encode(
    y=alt.datum(61937)
)

# chart_income_pop = scatter_income_pop + loess_outline + loess_colored
chart_income_pop = loess_outline_income_pop + loess_income_pop + scatter_income_pop + datum_income_pop

chart_income_pop.resolve_scale(color='shared').properties(
    height=300,
    width=500,
    title=alt.Title(['Do reservations with many gaming investments', 'experience greater wealth?'],
                    anchor='start',
                    subtitle='Compare household incomes on reservations against the 2018 federal average',
                    # align='center',
                    fontSize=22,
                    dx=25)
).configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=15,
    labelFontSize=15,
    titleAnchor='middle',
    titleAlign='center',
    orient='top-right'
)

### 2. 5 wealthiest vs 5 poorest nations and their gaming facilities / population / 

In [167]:
gaming_net_crosswalked_2018 = gaming_net_crosswalked.loc[
        gaming_net_crosswalked['datayear']==2018, 
        ['owner_name', 'is_facility', 'geoname_mostrecent', 'hincmed_total', 'hincmed_aiana', 'incagg_total']
    ].dropna(subset=['incagg_total'])

In [169]:
gaming_net_crosswalked['geocat_mostrecent'].value_counts()

geocat_mostrecent
Federal Reservations/Off-Reservation Trust Lands      2250
Alaska Native Village Statistical Areas (ANVSAs)      1526
Hawaiian Home Lands (HHLs)                             435
All states and commonwealths                           362
Oklahoma Tribal Statistical Areas (OTSAs)              195
State-designated Tribal Statistical Areas (SDTSAs)     182
State Reservations                                      71
Tribally Designated Statistical Areas (TDSAs)           43
Oklahoma Joint-Use Areas                                25
Federal Joint-Use Areas                                 20
United States (national)                                 7
Name: count, dtype: int64

In [165]:
gaming_net_crosswalked_2018.sort_values(by='incagg_total', ascending=True)

Unnamed: 0,owner_name,is_facility,geoname_mostrecent,hincmed_total,hincmed_aiana,incagg_total
2375,,,Lime Village ANVSA,,,9.295255e+04
4327,,,Stevens Village ANVSA,,,9.295255e+04
4417,,,Takotna ANVSA,,,1.628490e+05
4669,,,Ugashik ANVSA,,,1.641838e+05
3743,,,Resighini Rancheria,,,2.508263e+05
...,...,...,...,...,...,...
1182,,,Florida (state),64638.427579,54979.128509,7.547922e+11
2965,,,New York (state),79268.139838,50076.670188,8.920396e+11
4508,,,Texas (state),72286.990648,63215.015794,1.019981e+12
469,,,California (state),86433.737954,64337.484593,1.663709e+12


### 3. State poverty level compared to Indian nations' poverty levels

In [253]:
poverty_levels = gaming_net_crosswalked.dropna(subset=['owner_name', 'geoname_mostrecent'])

In [254]:
poverty_levels = poverty_levels.merge(nigc.loc[:,['owner_name', 'owner_state']], on='owner_name', how='left')

In [255]:
poverty_levels.dropna(subset=['datayear'], inplace=True)

In [256]:
poverty_levels = poverty_levels.loc[:, ['owner_name', 'owner_state', 'datayear', 'povshare_total']].drop_duplicates()

In [257]:
# Now do the same but for states

In [258]:
poverty_levels['datayear_dt'] = pd.to_datetime(poverty_levels['datayear'].astype(int), format='%Y')

In [259]:
# Many issues with states. Drop the nonsense ones
abbreviations = [
    # https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#States.
    "AK", "AL", "AR", "AZ", "CA", "CO", "CT", "DE", "FL", "GA", "HI", "IA",
    "ID", "IL", "IN", "KS", "KY", "LA", "MA", "MD", "ME", "MI", "MN", "MO",
    "MS", "MT", "NC", "ND", "NE", "NH", "NJ", "NM", "NV", "NY", "OH", "OK",
    "OR", "PA", "RI", "SC", "SD", "TN", "TX", "UT", "VA", "VT", "WA", "WI",
    "WV", "WY",
]

In [260]:
poverty_levels = poverty_levels.loc[poverty_levels['owner_state'].isin(abbreviations), :]

There are far too many states to put on here. Just choose the 3 states with the most facilities.

In [261]:
states_5_most_facilities = nigc['owner_state'].value_counts()[:4].index.tolist()

In [262]:
states_5_most_facilities

['OK', 'CA', 'PO', 'AZ']

In [263]:
poverty_levels_top = poverty_levels.loc[poverty_levels['owner_state'].isin(states_5_most_facilities), :]

In [264]:
spaghetti_poverty = alt.Chart(poverty_levels_top).mark_bar().encode(
    x=alt.X(
        'datayear_dt:O',
        title='Year',
        timeUnit='year',
        axis=alt.Axis(format='%Y')
    ),
    y=alt.Y('max(povshare_total):Q'),
    color=alt.Color('owner_state:N'),
    xOffset='owner_state:N'
)
spaghetti_poverty

In [282]:
strip_poverty = alt.Chart(poverty_levels).mark_tick(color='black').encode(
    x=alt.X('povshare_total').title('Percentage of total population in poverty'),
    y=alt.Y('datayear:O').title('Year'),
    # color=alt.Color('owner_state')
)
median_datum = alt.Chart(poverty_levels).mark_rule(
        color='deepskyblue', 
        # strokeDash=[1,1], 
        size=6,
        cornerRadius=10
    ).encode(
        x=alt.X('median(povshare_total):Q'),
        y=alt.Y('datayear:O'),
        # color='datayear'
    )

chart_poverty = strip_poverty + median_datum

chart_poverty.properties(
    height=300,
    width=500,
    title=alt.Title(['Has reservation poverty gotten less pervasive', 'over the years?'],
                    anchor='start',
                    subtitle='Median reservation poverty rate indicated by line',
                    # align='center',
                    fontSize=22,
                    dx=25)
).configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
).configure_legend(
    titleFontSize=15,
    labelFontSize=15,
    titleAnchor='middle',
    titleAlign='center',
    orient='top-right'
)

### 4. States vs. Nations they share borders with

In [313]:
gaming_net_crosswalked_state = gaming_net_crosswalked.merge(nigc.loc[:, ['owner_name','owner_state']], 
                                                            on='owner_name',
                                                            how='left')

In [314]:
gaming_net_crosswalked_state['geocat_mostrecent'].value_counts()

geocat_mostrecent
Federal Reservations/Off-Reservation Trust Lands      5267
Alaska Native Village Statistical Areas (ANVSAs)      1540
Oklahoma Tribal Statistical Areas (OTSAs)             1205
Hawaiian Home Lands (HHLs)                             435
All states and commonwealths                           362
State-designated Tribal Statistical Areas (SDTSAs)     182
Tribally Designated Statistical Areas (TDSAs)           73
State Reservations                                      71
Oklahoma Joint-Use Areas                                25
Federal Joint-Use Areas                                 20
United States (national)                                 7
Name: count, dtype: int64

In [318]:
# Get all states, and get all nations with gaming facilities
gaming_net_crosswalked_state = gaming_net_crosswalked_state.loc[
    (~gaming_net_crosswalked_state['geocat_mostrecent'].isin(['United States (national)'])) ,
    :
]

gaming_net_crosswalked_state = gaming_net_crosswalked_state.loc[
    (gaming_net_crosswalked_state['geocat_mostrecent']=='All states and commonwealths') | (gaming_net_crosswalked_state['is_facility']>0),
    :
].drop_duplicates()

In [320]:
is_us_state = np.where(gaming_net_crosswalked_state['geocat_mostrecent']=='All states and commonwealths', True, False)
gaming_net_crosswalked_state['is_us_state'] = is_us_state

In [323]:
abbreviation_to_name = {
    # https://en.wikipedia.org/wiki/List_of_states_and_territories_of_the_United_States#States.
    "AK": "Alaska",
    "AL": "Alabama",
    "AR": "Arkansas",
    "AZ": "Arizona",
    "CA": "California",
    "CO": "Colorado",
    "CT": "Connecticut",
    "DE": "Delaware",
    "FL": "Florida",
    "GA": "Georgia",
    "HI": "Hawaii",
    "IA": "Iowa",
    "ID": "Idaho",
    "IL": "Illinois",
    "IN": "Indiana",
    "KS": "Kansas",
    "KY": "Kentucky",
    "LA": "Louisiana",
    "MA": "Massachusetts",
    "MD": "Maryland",
    "ME": "Maine",
    "MI": "Michigan",
    "MN": "Minnesota",
    "MO": "Missouri",
    "MS": "Mississippi",
    "MT": "Montana",
    "NC": "North Carolina",
    "ND": "North Dakota",
    "NE": "Nebraska",
    "NH": "New Hampshire",
    "NJ": "New Jersey",
    "NM": "New Mexico",
    "NV": "Nevada",
    "NY": "New York",
    "OH": "Ohio",
    "OK": "Oklahoma",
    "OR": "Oregon",
    "PA": "Pennsylvania",
    "RI": "Rhode Island",
    "SC": "South Carolina",
    "SD": "South Dakota",
    "TN": "Tennessee",
    "TX": "Texas",
    "UT": "Utah",
    "VA": "Virginia",
    "VT": "Vermont",
    "WA": "Washington",
    "WI": "Wisconsin",
    "WV": "West Virginia",
    "WY": "Wyoming",
}

In [324]:
name_to_abbreviation = {value: key for key, value in abbreviation_to_name.items()}

In [333]:
gaming_net_crosswalked_state.loc[
        gaming_net_crosswalked_state['is_us_state'],
        'owner_state'
    ] = gaming_net_crosswalked_state.loc[
        gaming_net_crosswalked_state['is_us_state'],
        'geoname_mostrecent'
    ].str.replace(' (state)', '').map(name_to_abbreviation)

In [335]:
# gaming_net_crosswalked_state.loc[
#         :, 
#         []
#     ].groupby(
#         by=['owner_state', 'is_us_state', 'datayear', 'unemp_total']
#     ).

In [363]:
gaming_net_crosswalked_state.loc[
    gaming_net_crosswalked_state['is_us_state'],
    'owner_name'
] = gaming_net_crosswalked_state.loc[
        gaming_net_crosswalked_state['is_us_state'],
        'geoname_mostrecent'
    ]

In [None]:
# gaming_net_crosswalked_state['owner_state'].value_counts()

gaming_net_crosswalked_state['owner_state'][gaming_net_crosswalked_state['owner_state'].isin(abbreviation_to_name.keys())]

owner_state
CA    390
OK    199
PO    147
AZ    117
NM     91
     ... 
UT      7
WV      7
VA      7
SW      7
US      4
Name: count, Length: 61, dtype: int64

In [384]:
bar_unemployment = alt.Chart(gaming_net_crosswalked_state.loc[
    (gaming_net_crosswalked_state['owner_state'].isin(['MT', 'OR', 'WA'])) & (gaming_net_crosswalked_state['datayear'].isin([2023])),
    :
]).mark_bar().encode(
    x=alt.X('geoname_mostrecent', sort='-y').title(''),
    y=alt.Y('unemp_total').title('Unemployment Rate'),
    facet=alt.Facet('owner_state').title(''),
    color=alt.Color('owner_state').scale(scheme='blues').legend(None),
    fill=alt.Fill('is_us_state').scale(scheme='set2').legend(None)
).resolve_scale(x='independent')

bar_unemployment.properties(
    height=300,
    width=200,
    title=alt.Title(['Employment: Reservations (in the northwest) are below state averages (2023)'],
                    anchor='start',
                    subtitle='State-wide unemployment rates highlighted',
                    # align='center',
                    fontSize=22,
                    dx=25)
).configure(
    font='Agency FB',
).configure_axis(
    labelFontSize=14,
    titleFontSize=16,
)
# Texture for state vs. nation, top X unemployment rates, facet by year

### 4a. Nations' share of total income in states?

### 5. Zooming in on Montana Nations: Bump Chart?

In [388]:
mt = gaming_net_crosswalked_state.loc[
        (gaming_net_crosswalked_state['owner_state']=='MT'), 
        :
    ]

#  & (gaming_net_crosswalked_state['datayear']==2023)

In [406]:
# # This looks horrendous, and is definitely bugged.
# alt.Chart(mt).mark_line().encode(
#     x=alt.X('datayear:O'),
#     y=alt.Y('rank_val:O'),
#     color=alt.Color('geoname_mostrecent:N'),
#     detail='geoname_mostrecent:N'
# ).transform_window(
#     rank_val="rank()",
#     sort=[
#         alt.SortField("incagg_total", order="descending")
#     ],
#     groupby=["datayear"],
# )

In [405]:
alt.Chart(mt).mark_line().encode(
    x=alt.X('datayear'),
    y=alt.Y('povshare_aiana'),
    color=alt.Color('geoname_mostrecent:N')
)

In [None]:
nigc_mt = nigc.loc[(nigc['owner_state']=='MT') & (nigc['is_facility']), :]

In [None]:
nigc_mt = nigc_mt.merge(crosswalk, on='owner_name')

In [None]:
mt_nations = nigc_mt.merge(net, on='geoname_mostrecent').drop_duplicates()

In [None]:
mt_state = net.loc[net['geoname_mostrecent']=='Montana (state)', :]

In [None]:
mt = pd.concat([mt_nations, mt_state])

In [None]:
alt.Chart(mt).mark_point()

### 6. Montana class distribution