# Pull states mentioned in text and user names
> Author: Sharnique Beck

Packages used:
- Python 3.6.6
- pandas 0.23.4

This notebook begins the process of pulling state abbreviations from the tweet text and usernames. This can then be used to verify cities pulled with states mentioned that have multiple states containg the same city names.

In [1]:
import pandas as pd
import regex as re


In [2]:
# import data
df = pd.read_csv('../data/combined_tweets_outages_cities.csv' )

# Drop columns not used
df.drop(columns = ['likes','replies','retweets'],  axis = 1, inplace=True)

In [3]:
df.head()

Unnamed: 0,timestamp,id,query,text,user,outage,outage_state,city_pull
0,2012-11-01 23:50:22,264152432282578945,EversourceMA OR EversourceNH OR VelcoVT OR nat...,"Tom May, CEO of Northeast Utilities, the paren...",EversourceMA,1,WV OH PA NJ CT MA NY DE MD IN KY MI,"['Sudbury, MA', 'Sudbury, VT']"
1,2012-11-01 23:45:13,264151136792109056,EversourceMA OR EversourceNH OR VelcoVT OR nat...,@NYGovCuomo @lipanews @nationalgridus @nyseand...,readyforthenet,1,WV OH PA NJ CT MA NY DE MD IN KY MI,
2,2012-11-01 23:34:44,264148498352590849,EversourceMA OR EversourceNH OR VelcoVT OR nat...,Some amazing video from the Wareham microburst...,EversourceMA,1,WV OH PA NJ CT MA NY DE MD IN KY MI,"['Wareham, MA', 'Dartmouth, MA']"
3,2012-11-01 23:34:20,264148399190851584,EversourceMA OR EversourceNH OR VelcoVT OR nat...,@nationalgridus Call me if you need some help ...,sparky1000,1,WV OH PA NJ CT MA NY DE MD IN KY MI,
4,2012-11-01 23:31:56,264147793147490304,EversourceMA OR EversourceNH OR VelcoVT OR nat...,Current PSNH statewide w/o power: 885. We're d...,EversourceNH,1,WV OH PA NJ CT MA NY DE MD IN KY MI,


In [4]:
df['city_pull'] = df['city_pull'].map(lambda x: '' if type(x)!=str else x)

### Pull state abbreviation from text

In [5]:
state =['MA','ME','NY','RI','VT','CT','NH'] 

# Look for state abbreviations mentioned in text column
df['text_state_pull'] = df['text'].map(lambda x: re.findall(r'([A-Z]{2})',x)) 
df['text_state_pull'] = df['text_state_pull'].map(lambda lst: [st for st in lst if st in state])
df['text_state_pull'] = df['text_state_pull'].map(lambda x: '' if x==[] else x[0])


### Pull state abbreviation from username

In [6]:
# Look for state abbreviations located in user names
df['user_state_pull']=df['user'].map(lambda x: re.findall(r'([A-Z]{2})',x)) 

df['user_state_pull'] = df['user_state_pull'].map(lambda lst: [st for st in lst if st in state])
df['user_state_pull'] = df['user_state_pull'].map(lambda x: '' if x==[] else x[0])


In [7]:
df

Unnamed: 0,timestamp,id,query,text,user,outage,outage_state,city_pull,text_state_pull,user_state_pull
0,2012-11-01 23:50:22,264152432282578945,EversourceMA OR EversourceNH OR VelcoVT OR nat...,"Tom May, CEO of Northeast Utilities, the paren...",EversourceMA,1,WV OH PA NJ CT MA NY DE MD IN KY MI,"['Sudbury, MA', 'Sudbury, VT']",,MA
1,2012-11-01 23:45:13,264151136792109056,EversourceMA OR EversourceNH OR VelcoVT OR nat...,@NYGovCuomo @lipanews @nationalgridus @nyseand...,readyforthenet,1,WV OH PA NJ CT MA NY DE MD IN KY MI,,NY,
2,2012-11-01 23:34:44,264148498352590849,EversourceMA OR EversourceNH OR VelcoVT OR nat...,Some amazing video from the Wareham microburst...,EversourceMA,1,WV OH PA NJ CT MA NY DE MD IN KY MI,"['Wareham, MA', 'Dartmouth, MA']",,MA
3,2012-11-01 23:34:20,264148399190851584,EversourceMA OR EversourceNH OR VelcoVT OR nat...,@nationalgridus Call me if you need some help ...,sparky1000,1,WV OH PA NJ CT MA NY DE MD IN KY MI,,,
4,2012-11-01 23:31:56,264147793147490304,EversourceMA OR EversourceNH OR VelcoVT OR nat...,Current PSNH statewide w/o power: 885. We're d...,EversourceNH,1,WV OH PA NJ CT MA NY DE MD IN KY MI,,NH,NH
5,2012-11-01 23:31:30,264147687325179905,EversourceMA OR EversourceNH OR VelcoVT OR nat...,Stop following nationalgridus,Zevimiller,1,WV OH PA NJ CT MA NY DE MD IN KY MI,,,
6,2012-11-01 23:30:21,264147396206923777,EversourceMA OR EversourceNH OR VelcoVT OR nat...,Our #MA team is being supported by crews from ...,nationalgridus,1,WV OH PA NJ CT MA NY DE MD IN KY MI,,MA,
7,2012-11-01 23:29:46,264147250270326786,EversourceMA OR EversourceNH OR VelcoVT OR nat...,"@EvanMansolillo Hi Evan, can you provide us wi...",nationalgridus,1,WV OH PA NJ CT MA NY DE MD IN KY MI,,,
8,2012-11-01 23:05:01,264141019094859776,EversourceMA OR EversourceNH OR VelcoVT OR nat...,"NSTAR crews have restored power to nearly 400,...",EversourceMA,1,WV OH PA NJ CT MA NY DE MD IN KY MI,,,MA
9,2012-11-01 22:41:50,264135186604769280,EversourceMA OR EversourceNH OR VelcoVT OR nat...,@nationalgridus Thanks 2 the crew who restored...,drcavanaugh,1,WV OH PA NJ CT MA NY DE MD IN KY MI,,,


In [8]:
df.to_csv('../data/combined_tweets_outages_cities_states.csv',index=False)