## Formatting the Elected Officials Data Set for Circle Map Visual

This notebook starts with the original elected officials raw data and formats it to prepare to merge it with another data set that contains latitude and longitude coordinates for cities in the U.S. (This notebook should be run **before** the "Merge_for_Circle_Map" Jupyter Notebook.)

**NOTE: This file requires pandas version 1.1.0 to run without errors.**

In [1]:
# Import Statements 

import pandas as pd
pd.__version__

'1.1.1'

In [2]:
# Reads in the ELOF dataset
df_elof = pd.read_csv('RDC-Americas-Cities-2020-Raw-Data - Raw Data- Top 100 Cities.csv')

# Converts all variables to lowercase
df_elof.columns = df_elof.columns.str.replace('\s+', '_').str.lower()

df_elof.head()

Unnamed: 0,state,office_uuid,office_name,seat,office_level,office_role,office_category,body_name,jurisdiction,jurisdiction_ocdid,...,person_uuid,official_name,official_party,party,wnw,race,sex,level,include,state.1
0,AL,c13e6096-e0aa-4c64-b9d6-1e1ebe328f0f,Birmingham City Council Member,District 1,locality,,,,Birmingham city,,...,07802561-7ea9-4b7b-b49c-6ec401a0d683,Clinton Woods,Nonpartisan,Independent,Non-White,Black or African American,Male,City,1,AL
1,AL,6b865770-e2cd-423e-bfa0-b0a96b463ab8,Birmingham City Council Member,District 4,locality,,,,Birmingham city,,...,8a058100-7fd8-41c6-b8c6-8d46f555c382,William Parker,Nonpartisan,Independent,Non-White,Black or African American,Male,City,1,AL
2,AL,c28f6b10-c58a-418a-8281-40384263eac5,Birmingham City Council Member,District 6,locality,,,,Birmingham city,,...,2d875891-b877-43b4-b678-362c2fbbe2d7,Crystal Smitherman,Nonpartisan,Independent,Non-White,Black or African American,Female,City,1,AL
3,AL,0dda83cc-014e-41ee-98f2-28b9ef5c35c2,Birmingham City Council Member,District 7,locality,,,,Birmingham city,,...,fba7f72c-6df1-4725-a973-0c337ae6e57d,Wardine Alexander,Nonpartisan,Independent,Non-White,Black or African American,Female,City,1,AL
4,AL,cc8204ba-8ba8-4240-8f29-34a80ade9fd3,Birmingham City Council Member,District 8,locality,,,,Birmingham city,,...,bbde59bd-12be-4f70-a298-f10200926ab8,Steven W. Hoyt,Nonpartisan,Independent,Non-White,Black or African American,Male,City,1,AL


In [3]:
def city_counts_df(by_state, state):
    
    '''This function takes a by_state groupby
       object and a state string and outputs a dataframe
       that includes a column with the count of 
       the number of data points for the state.
       
       by_state is includes the columns, 'jurisdiction'
       and 0, where 0 is the count of data points per jurisdiction
       
    '''
    new_df = pd.DataFrame(by_state.get_group(state).value_counts())
    
    # For some reason, this makes the index, 'state'
    new_df = new_df.reset_index('jurisdiction')
    
    # Renaming index and columns
    new_df = new_df.rename(columns={0:'count', 'jurisdiction':'city'})
    new_df = new_df.rename_axis('state')
    
    # Returns new_df
    return new_df

In [4]:
# Grabs all unique states in ELOF data set
states_list = list(df_elof.state.unique())

# Creates a Groupby object from ELOF DataFrame
by_state = df_elof.groupby(['state'])[['state', 'jurisdiction']]

# Initializes a DataFrame
all_cities = pd.DataFrame()

# Appends all cities and jurisdiction value_counts
# for each state to a new DataFrame
for state in states_list:
    new_df = city_counts_df(by_state, state)
    # Appends current DataFrame to existing 
    all_cities = all_cities.append(new_df)

all_cities.head()

Unnamed: 0_level_0,city,count
state,Unnamed: 1_level_1,Unnamed: 2_level_1
AL,Birmingham city,10
AZ,Phoenix city,9
AZ,Tucson city,7
AZ,Scottsdale city,7
AZ,Mesa city,7


In [5]:
# Now we would like to remove the word 'city'
# or 'county' from each item in the column, 'city'

def remove_text(text):
    
    '''This function takes in a state
       string from the above by_state groupby
       object and outputs the unique jurisdictions
       
    '''

    # Removing 'city' and 'county' from jurisdiction names
    city = text.lower().find('city')
    county = text.lower().find('county')
    if city != -1:
        text = text[:city - 1]
    elif county != -1: 
        text = text[:county - 1]
    else:
        pass
    
    return text

In [6]:
# Apply the function to all items in the 'Jurisdiction' column
all_cities['city'] = all_cities['city'].apply(lambda x: remove_text(x))
all_cities

Unnamed: 0_level_0,city,count
state,Unnamed: 1_level_1,Unnamed: 2_level_1
AL,Birmingham,10
AZ,Phoenix,9
AZ,Tucson,7
AZ,Scottsdale,7
AZ,Mesa,7
...,...,...
VA,Chesapeake,13
WA,Seattle,11
WA,Spokane,8
WI,Madison,21


In [7]:
# Looking at summary for the column, count
all_cities['count'].describe()

count    102.000000
mean      11.676471
std        7.989054
min        2.000000
25%        7.250000
50%        9.500000
75%       13.000000
max       54.000000
Name: count, dtype: float64

In [8]:
# Save to a csv
all_cities.to_csv('elof_cities.csv')