In [106]:
import altair as alt
import pandas as pd
import numpy as np

## Merging Native Economic Trends and NIGC data

In [40]:
net = pd.read_excel('../data/native-economic-trends-data_current.xlsx', sheet_name=1)
nigc = pd.read_csv('../data/clean/clean_gaming_report.csv')

In [41]:
crosswalk = pd.read_csv('../data/clean/verified_crosswalk.csv')

In [42]:
# Let's grab the number of facilities listed in each owner.
# This is not really a reliable count because many facilities appear to be multi-counted.
# But that is mostly a hunch.
# nigc.loc[nigc['owner_name']=='Casino Morongo',:]

In [43]:
nigc.drop_duplicates().shape

(774, 7)

In [44]:
nigc.shape

(831, 7)

In [45]:
nigc.drop_duplicates(inplace=True)

In [46]:
nigc_facility_counts = nigc.loc[:, ['owner_name', 'is_facility']].groupby(
    by=['owner_name']
    ).sum(

    ).sort_values(
        'is_facility', ascending=False
    )
nigc_facility_counts = nigc_facility_counts.loc[nigc_facility_counts['is_facility']>0, :]
nigc_facility_counts
# These counts are wrong, at least for top 2 nations

Unnamed: 0_level_0,is_facility
owner_name,Unnamed: 1_level_1
"Chickasaw Nation, Oklahoma",25
Choctaw Nation of Oklahoma,23
White Earth Nation,21
"Muscogee (Creek) Nation, Oklahoma",12
"Cherokee Nation, Oklahoma",10
...,...
Washoe Tribe of Nevada and California,1
"Yavapai Apache Nation of the Camp Verde Indian Reservation, AZ",1
Yocha DeHe Wintun Nation,1
Ysleta del Sur Pueblo,1


In [51]:
nigc_classes_counts_long = nigc.loc[nigc['is_facility'], ['owner_name', 'classes']].groupby(['owner_name', 'classes']).size()

In [64]:
nigc_classes_counts_long = pd.DataFrame(nigc_classes_counts_long).reset_index().rename(columns={0:'count'})

In [99]:
nigc_classes_counts_wide = nigc_classes_counts_long.pivot(index='owner_name', columns='classes', values='count')#.reset_index(drop=True)


In [101]:
nigc_classes_counts_wide.columns.name = None

In [104]:
nigc_classes_counts_wide.reset_index(inplace=True)

In [108]:

nigc_classes_counts_wide.replace(np.nan, 0, inplace=True)

In [109]:
nigc_classes_counts_wide.columns

Index(['owner_name', 'Class II', 'Class II & III', 'Class III', 'Other'], dtype='object')

In [111]:
nigc_classes_counts_wide.loc[:, ['Class II', 'Class II & III', 'Class III', 'Other']] = nigc_classes_counts_wide.loc[:, ['Class II', 'Class II & III', 'Class III', 'Other']].astype(int)

In [113]:
nigc_all_counts = nigc_facility_counts.merge(nigc_classes_counts_wide, on='owner_name')

In [130]:
nigc_all_counts.fillna(0, inplace=True)

In [None]:

cols_to_int = ['is_facility', 'Class II', 'Class II & III', 'Class III', 'Other']
nigc_all_counts[cols_to_int] = nigc_all_counts[cols_to_int].astype(int)

In [146]:
nigc_all_counts

Unnamed: 0,owner_name,is_facility,Class II,Class II & III,Class III,Other
0,"Chickasaw Nation, Oklahoma",25,2,23,0,0
1,Choctaw Nation of Oklahoma,23,0,23,0,0
2,White Earth Nation,21,19,2,0,0
3,"Muscogee (Creek) Nation, Oklahoma",12,0,12,0,0
4,"Cherokee Nation, Oklahoma",10,0,10,0,0
...,...,...,...,...,...,...
235,Washoe Tribe of Nevada and California,1,0,0,1,0
236,Yavapai Apache Nation of the Camp Verde Indian...,1,0,1,0,0
237,Yocha DeHe Wintun Nation,1,0,1,0,0
238,Ysleta del Sur Pueblo,1,0,1,0,0


### Applying crosswalk

In [148]:
# crosswalk = crosswalk.loc[:, ['owner_name', 'geoname_mostrecent']]
# crosswalk.drop(columns=['no_reservation_or_not_listed'])

In [149]:
nigc_all_counts = nigc_all_counts.merge(crosswalk, on='owner_name')

In [None]:
# Outer join bc we care about non-gaming nations as well, of course
trends = nigc_all_counts.merge(net, on='geoname_mostrecent', how='outer')

## Visualizations