In [2]:
my_string = 'hello'
my_string.capitalize()

'Hello'

In [3]:
my_string.isdigit()

False

In [5]:
help(str)

Help on class str in module builtins:

class str(object)
 |  str(object='') -> str
 |  str(bytes_or_buffer[, encoding[, errors]]) -> str
 |  
 |  Create a new string object from the given object. If encoding or
 |  errors is specified, then the object must expose a data buffer
 |  that will be decoded using the given encoding and error handler.
 |  Otherwise, returns the result of object.__str__() (if defined)
 |  or repr(object).
 |  encoding defaults to sys.getdefaultencoding().
 |  errors defaults to 'strict'.
 |  
 |  Methods defined here:
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __contains__(self, key, /)
 |      Return key in self.
 |  
 |  __eq__(self, value, /)
 |      Return self==value.
 |  
 |  __format__(self, format_spec, /)
 |      Return a formatted version of the string as described by format_spec.
 |  
 |  __ge__(self, value, /)
 |      Return self>=value.
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  

In [2]:
import pandas as pd

In [16]:
names = pd.Series(['abhi','arnav','aryan','aditya','5'])

In [17]:
names

0      abhi
1     arnav
2     aryan
3    aditya
4         5
dtype: object

In [18]:
names.str.capitalize()

0      Abhi
1     Arnav
2     Aryan
3    Aditya
4         5
dtype: object

In [19]:
names.str.isdigit()

0    False
1    False
2    False
3    False
4     True
dtype: bool

In [23]:
tech_finance = ['GOOGLE,AMAZON,MICROSOFT','TATA,BMW,TOYOTA']

In [24]:
tickers = pd.Series(tech_finance)

In [25]:
tickers

0    GOOGLE,AMAZON,MICROSOFT
1            TATA,BMW,TOYOTA
dtype: object

In [28]:
tickers.str.split(',')[0]

['GOOGLE', 'AMAZON', 'MICROSOFT']

In [30]:
tickers.str.split(',').str[0]#str[0] take first value from each row

0    GOOGLE
1      TATA
dtype: object

In [33]:
tickers.str.split(',', expand = True)#it expand into seprate column

Unnamed: 0,0,1,2
0,GOOGLE,AMAZON,MICROSOFT
1,TATA,BMW,TOYOTA


In [3]:
messy_names = pd.Series(["andrew  ","bo;bo","  claire  "])

In [4]:
messy_names

0      andrew  
1         bo;bo
2      claire  
dtype: object

In [6]:
messy_names.str.replace(";","")

0      andrew  
1          bobo
2      claire  
dtype: object

In [7]:
messy_names.str.strip()

0    andrew
1     bo;bo
2    claire
dtype: object

In [8]:
messy_names.str.replace(";","").str.strip()

0    andrew
1      bobo
2    claire
dtype: object

In [9]:
messy_names.str.replace(";","").str.strip().str.capitalize()

0    Andrew
1      Bobo
2    Claire
dtype: object

In [10]:
def cleanup(name):
    name = name.replace(";","")
    name = name.strip()
    name = name.capitalize()
    return name

In [11]:
messy_names

0      andrew  
1         bo;bo
2      claire  
dtype: object

In [12]:
messy_names.apply(cleanup)

0    Andrew
1      Bobo
2    Claire
dtype: object

In [13]:
import timeit 
  
# code snippet to be executed only once 
setup = '''
import pandas as pd
import numpy as np
messy_names = pd.Series(["andrew  ","bo;bo","  claire  "])
def cleanup(name):
    name = name.replace(";","")
    name = name.strip()
    name = name.capitalize()
    return name
'''
  
# code snippet whose execution time is to be measured 
stmt_pandas_str = ''' 
messy_names.str.replace(";","").str.strip().str.capitalize()
'''

stmt_pandas_apply = '''
messy_names.apply(cleanup)
'''

stmt_pandas_vectorize='''
np.vectorize(cleanup)(messy_names)
'''

In [14]:
timeit.timeit(setup = setup, 
                    stmt = stmt_pandas_str, 
                    number = 10000) 

2.495393300196156

In [15]:
timeit.timeit(setup = setup, 
                    stmt = stmt_pandas_apply, 
                    number = 10000) 

0.45844929991289973

In [16]:
timeit.timeit(setup = setup, 
                    stmt = stmt_pandas_vectorize, 
                    number = 10000) 

0.3307306000497192