# Human Rights Campaign - Municipality Equality Index Cleaning

* [Imports](#Imports)
* [Cleaning](#Cleaning)


## Imports

In [11]:
import numpy as np
import pandas as pd


In [12]:
df = pd.read_csv('../data/mei.csv', header=None)

In [13]:
df.head()

Unnamed: 0,0,1,2,3,4
0,ALABAMA,Auburn,21,2,23
1,ALABAMA,Birmingham,94,8,100
2,ALABAMA,Florence,0,0,0
3,ALABAMA,Hoover,12,0,12
4,ALABAMA,Huntsville,20,0,20


## Cleaning

In [14]:
df.rename(columns={0: 'state', 1: 'city', 2: 'standard_score', 
                   3: 'flex_score', 4: 'total_mei'}, inplace=True)

In [15]:
df.isnull().sum()

state             0
city              0
standard_score    0
flex_score        0
total_mei         0
dtype: int64

In [16]:
df.dtypes

state             object
city              object
standard_score     int64
flex_score         int64
total_mei          int64
dtype: object

In [19]:
df['city'] = df['city'].str.lower()
df['state'] = df['state'].str.capitalize()
df.head()

Unnamed: 0,state,city,standard_score,flex_score,total_mei
0,Alabama,auburn,21,2,23
1,Alabama,birmingham,94,8,100
2,Alabama,florence,0,0,0
3,Alabama,hoover,12,0,12
4,Alabama,huntsville,20,0,20


In [23]:
# mapping states to state abbreviations
us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New hampshire": "NH",
    "New jersey": "NJ",
    "New mexico": "NM",
    "New york": "NY",
    "North carolina": "NC",
    "North dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode island": "RI",
    "South carolina": "SC",
    "South dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of columbia": "DC"
}

In [24]:
df['state'] = df['state'].map(lambda x: us_state_to_abbrev[x])

In [25]:
df.head()

Unnamed: 0,state,city,standard_score,flex_score,total_mei
0,AL,auburn,21,2,23
1,AL,birmingham,94,8,100
2,AL,florence,0,0,0
3,AL,hoover,12,0,12
4,AL,huntsville,20,0,20


All of my other cleaning was performed in Excel prior to reading in the CSV in this notebook.

In [26]:
df.to_csv('../data/mei_clean.csv', index=False)