In [1]:
import pandas as pd
import numpy as np

In [2]:
"hello".upper()

'HELLO'

### [String handling methods in pandas](https://pandas.pydata.org/pandas-docs/stable/reference/series.html#string-handling)

## How to use string methods in pandas?

In [4]:
orders = pd.read_table('http://bit.ly/chiporders')
orders.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and Fresh Tomato Salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket Nectar,[Apple],$3.39
3,1,1,Chips and Tomatillo-Green Chili Salsa,,$2.39
4,2,2,Chicken Bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


### To use a string method on pandas series need to use str.method() with the series

In [6]:
# To convert item_name column to upper case
orders['item_name'] = orders['item_name'].str.upper()

In [7]:
orders.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,CHIPS AND FRESH TOMATO SALSA,,$2.39
1,1,1,IZZE,[Clementine],$3.39
2,1,1,NANTUCKET NECTAR,[Apple],$3.39
3,1,1,CHIPS AND TOMATILLO-GREEN CHILI SALSA,,$2.39
4,2,2,CHICKEN BOWL,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


In [9]:
# To capitalize the above used series
orders['item_name'] = orders['item_name'].str.capitalize()
orders.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and fresh tomato salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket nectar,[Apple],$3.39
3,1,1,Chips and tomatillo-green chili salsa,,$2.39
4,2,2,Chicken bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


Using [pandas.series,str.contains](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.contains.html)

In [12]:
orders['item_name'].str.contains('chips', case = False).head(11)

0      True
1     False
2     False
3      True
4     False
5     False
6      True
7     False
8     False
9     False
10     True
Name: item_name, dtype: bool

In [14]:
## Filtering rows of the data based on the condition
orders[orders['item_name'].str.contains('chips', case = False)].head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and fresh tomato salsa,,$2.39
3,1,1,Chips and tomatillo-green chili salsa,,$2.39
6,3,1,Side of chips,,$1.69
10,5,1,Chips and guacamole,,$4.45
14,7,1,Chips and guacamole,,$4.45


In [16]:
## Or alternately using combination of loc and contains
orders.loc[orders['item_name'].str.contains('chips', case = False),:].head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and fresh tomato salsa,,$2.39
3,1,1,Chips and tomatillo-green chili salsa,,$2.39
6,3,1,Side of chips,,$1.69
10,5,1,Chips and guacamole,,$4.45
14,7,1,Chips and guacamole,,$4.45


In [18]:
## To substitute/replace a value
orders['choice_description'].str.replace('[','').head(7)

0                                                  NaN
1                                          Clementine]
2                                               Apple]
3                                                  NaN
4    Tomatillo-Red Chili Salsa (Hot), Black Beans, ...
5    Fresh Tomato Salsa (Mild), Rice, Cheese, Sour ...
6                                                  NaN
Name: choice_description, dtype: object

In [21]:
orders.head()

Unnamed: 0,order_id,quantity,item_name,choice_description,item_price
0,1,1,Chips and fresh tomato salsa,,$2.39
1,1,1,Izze,[Clementine],$3.39
2,1,1,Nantucket nectar,[Apple],$3.39
3,1,1,Chips and tomatillo-green chili salsa,,$2.39
4,2,2,Chicken bowl,"[Tomatillo-Red Chili Salsa (Hot), [Black Beans...",$16.98


`orders['choice_description'].str.replace('[','')`  ->  This will also return a series

Using chaining to replace

In [31]:
orders['choice_description'].str.replace('[','').str.replace(']','').head(7)

0                                                  NaN
1                                           Clementine
2                                                Apple
3                                                  NaN
4    Tomatillo-Red Chili Salsa (Hot), Black Beans, ...
5    Fresh Tomato Salsa (Mild), Rice, Cheese, Sour ...
6                                                  NaN
Name: choice_description, dtype: object

### Using reg expressions to remove

In [33]:
orders['choice_description'].str.replace('[\[\]]','').head(7)

0                                                  NaN
1                                           Clementine
2                                                Apple
3                                                  NaN
4    Tomatillo-Red Chili Salsa (Hot), Black Beans, ...
5    Fresh Tomato Salsa (Mild), Rice, Cheese, Sour ...
6                                                  NaN
Name: choice_description, dtype: object