# Strings...
- [pandas.DataFrame.replace](http://pandas.pydata.org/pandas-docs/stable/generated/pandas.DataFrame.replace.html)
- [Pandas API - Strong Handeling](http://pandas.pydata.org/pandas-docs/stable/api.html)
- .str.strip([to_strip])
- .str.replace(pat, repl[, n, case, flags])
- .str.lower()
- etc...

In [2]:
import pandas as pd

In [18]:
# read a dataset of Chipotle orders into a DataFrame
orders = pd.read_table('http://bit.ly/chiporders')
orders.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [19]:
# Modify the colum names on import using the 'str.replace' method
# Code below replaces all spaces with underscores in the column names
orders.columns = orders.columns.str.replace(' ', '_')
orders.columns

Index(['order_id', 'quantity', 'item_name', 'choice_description',
       'item_price'],
      dtype='object')

In [20]:
# normal way to access string methods in Python
'hello'.upper()

'HELLO'

In [23]:
# string method 'contains' checks for a substring and returns a boolean Series
orders['item_name'].str.contains('Chicken').head()

0    False
1    False
2    False
3    False
4     True
Name: item_name, dtype: bool

In [24]:
# use the boolean Series to filter the DataFrame
orders[orders.item_name.str.contains('Chicken')].head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
5,3,1,Chicken Bowl,"[Fresh Tomato Salsa (Mild), [Rice, Cheese, Sou...",$10.98
11,6,1,Chicken Crispy Tacos,"[Roasted Chili Corn Salsa, [Fajita Vegetables,...",$8.75
12,6,1,Chicken Soft Tacos,"[Roasted Chili Corn Salsa, [Rice, Black Beans,...",$8.75
13,7,1,Chicken Bowl,"[Fresh Tomato Salsa, [Fajita Vegetables, Rice,...",$11.25


In [25]:
# string methods can be chained together
orders.choice_description.str.replace('[', '').str.replace(']', '').head()

0                                                  NaN
1                                           Clementine
2                                                Apple
3                                                  NaN
4    Tomatillo-Red Chili Salsa (Hot), Black Beans, ...
Name: choice_description, dtype: object

In [26]:
# many pandas string methods support regular expressions (regex)
orders.choice_description.str.replace('[\[\]]', '').head()

0                                                  NaN
1                                           Clementine
2                                                Apple
3                                                  NaN
4    Tomatillo-Red Chili Salsa (Hot), Black Beans, ...
Name: choice_description, dtype: object

In [27]:
dicta = {'City':'PAD TAI', 'Colors Reported	':'popo', 'Chips and Fresh Tomato Salsa':'Troxell'}
orders = orders.replace(to_replace=dicta)
orders.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Troxell,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98
