# Text Methods
* Often text data needs to be cleaned or manipulated for processing
* While we can always use a custom apply() function for these tasks, pandas comes with many built-in string method calls


In [1]:
import numpy as np
import pandas as pd

In [3]:
email = 'jose@email.com'

In [9]:
email.isdigit()

False

In [5]:
names = pd.Series(['andrew','bobo','claire','david','5'])

In [6]:
names

0    andrew
1      bobo
2    claire
3     david
4         5
dtype: object

In [7]:
names.str.upper()

0    ANDREW
1      BOBO
2    CLAIRE
3     DAVID
4         5
dtype: object

In [13]:
names.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [15]:
tech_finance = ['GOOG,APPL,AMZN','JPM,BAC,GS']

In [16]:
len(tech_finance)

2

In [17]:
tickers = pd.Series(tech_finance)

In [21]:
tickers.str.split(',',expand = True)

Unnamed: 0,0,1,2
0,GOOG,APPL,AMZN
1,JPM,BAC,GS


In [22]:
messy_names = pd.Series(['andrew ','bo;bo','  claire  '])

In [23]:
messy_names

0       andrew 
1         bo;bo
2      claire  
dtype: object

In [28]:
messy_names.str.replace(';','').str.strip().str.capitalize()  
# replaces ; with blank space
# str.strip removes all blank spaces
# str.capitalize capitalizes the first letter of each string

0    Andrew
1      Bobo
2    Claire
dtype: object

In [29]:
def cleanup(name):
    '''
    If the cleaning up the data is very string specific and very messy it may be best to just make an apply function like below
    applying custom function actually tends to be more efficient especially if vectorized
    '''
    name = name.replace(';','')
    name = name.strip()
    name = name.capitalize()
    return name




In [33]:
messy_names.apply(cleanup)

0    Andrew
1      Bobo
2    Claire
dtype: object