In [8]:
import os
import pandas as pd
from pandas import DataFrame
from pathlib2 import Path

#Changing dirs to obtain login info and import class.
home = os.getcwd()
gis_dir = '\\'.join(home.split('\\')[:-1]) + '\\gis'
os.chdir(gis_dir)
import esri
os.chdir(home)

#Logging in to agol.
l = esri.Arcgis(Path(gis_dir + '\\config.json'))
#Reading in prepped data.
df = pd.read_excel('10_11_20_prepped.xlsx')

#Obtaining distinct addresses so do not have to geocode same location multiple times.
da = df[['City', 'State_Province', 'Country']].drop_duplicates()
#Calling State_Province a "region" to align with API calls. Also making columns lowercase.
da.columns = ['city', 'region', 'country']
#List of address dicts.
final_res = []
for num in range(0, len(da)):
        res = {}
        for c in da.columns:
            res[c] = list(da.iloc[num].values)[list(da.columns).index(c)]
        final_res.append(res)
        
#List of address objects.
addresses = [esri.Address(add) for add in final_res]
#Geocoding all addresses
geoc_add = []
for a in addresses:
    a.multi_field_add['coords'] = a.long_lat()
    geoc_add.append(a)
    
#Geocoded dataframe
geoc_df = DataFrame([a.multi_field_add for a in geoc_add])

#Transitioning coordinates to series to add to df.
geoc_df = pd.concat([geoc_df.iloc[:, 0:3], geoc_df.loc[:, 'coords'].apply(pd.Series)], axis = 1)

#Renaming cols.
geoc_df.columns = list(geoc_df.columns[0:3]) + ['x', 'y']

#Geocoded point-level detail for AGOL.
res = pd.merge(df, geoc_df, left_on = ['City', 'State_Province', 'Country'],
               right_on = ['city', 'region', 'country'], how = 'left')

#Dropping redundant columns used for geocoding.
res = res.drop(['city', 'region', 'country'], axis = 1)

#Commenting out write method to prevent overwrite mishap.
# res.to_csv('10_11_20_geocoded.csv', index = False)

#Creating aggregate mean and count version with geocoded points too.
cols = ['Location', 'Total Compensation', 'Base', 'Stock', 'Bonus', 'Years of Experience', 'Years at Company']
agg_mean = res[cols].pivot_table(index = 'Location', aggfunc = np.mean).reset_index()
agg_mean = pd.merge(res[['Location', 'x', 'y']].drop_duplicates(), agg_mean, on = 'Location')
count = DataFrame(res.groupby('Location')['key_0'].nunique()).reset_index()
count.loc[:, 'Count'] = count.key_0
count = count.drop('key_0', axis = 1)
#Writing aggregate file. Commenting out write method to prevent overwrite mishap.
res2 = pd.merge(agg_mean, count, on = 'Location')
# res2.to_csv('10_11_20_agg_mean.csv', index = False)