In [None]:
import pandas as pd
import numpy as np

In [11]:
# What are vectorized operations
#  Vectorized operations in pandas refer to the ability of pandas to perform element-wise operations on entire arrays (columns or rows) of
#  data without the need for explicit loops.
a = np.array([1,2,3,4])
a * 4

array([ 4,  8, 12, 16])

In [12]:
# problem in vectorized opertions in vanilla python
s = ['cat','mat',None,'rat']            #############  None #######

[i.startswith('c') for i in s]

AttributeError: 'NoneType' object has no attribute 'startswith'

In [13]:
# How pandas solves this issue?

s = pd.Series(['cat','mat',None,'rat'])
# string accessor
s.str.startswith('c')

# fast and optimized

0     True
1    False
2     None
3    False
dtype: object

In [14]:
import seaborn as sns
df=sns.load_dataset('titanic')
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked', 'class', 'who', 'adult_male', 'deck', 'embark_town',
       'alive', 'alone'],
      dtype='object')

In [15]:
# import titanic
df=sns.load_dataset('titanic')
df['Name']

KeyError: 'Name'

In [None]:
df['Name']

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [None]:
# Common Functions
# lower/upper/capitalize/title
df['Name'].str.upper()
df['Name'].str.capitalize()
df['Name'].str.title()
# len
df['Name'][df['Name'].str.len() == 82].values[0]      ################################### Extracts the row values from the columns verticlaly ############
# strip
"                   nitish                              ".strip()
df['Name'].str.strip()

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [None]:
df['Name'].values[0]

'Braund, Mr. Owen Harris'

In [None]:
df['Name'][df['Name'].str.len()>60].values[0]

'Penasco y Castellana, Mrs. Victor de Satode (Maria Josefa Perez de Soto y Vallejo)'

In [None]:
df['last_name']=df['Name'].str.split(',').str.get(0)  # get to fetch any value from the str,list,tuple and all within series
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S,Braund
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,Cumings
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S,Heikkinen
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S,Futrelle
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S,Allen
...,...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S,Montvila
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S,Graham
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S,Johnston
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C,Behr


In [None]:
df['Name'][df['Name'].str.len() == 82]

307    Penasco y Castellana, Mrs. Victor de Satode (M...
Name: Name, dtype: object

In [None]:
# split -> get
df['lastname'] = df['Name'].str.split(',').str.get(0)
df.head()

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked,last_name,lastname
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S,Braund,Braund
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C,Cumings,Cumings
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S,Heikkinen,Heikkinen
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S,Futrelle,Futrelle
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S,Allen,Allen


In [None]:
df['Name'].str.strip()

0                                Braund, Mr. Owen Harris
1      Cumings, Mrs. John Bradley (Florence Briggs Th...
2                                 Heikkinen, Miss. Laina
3           Futrelle, Mrs. Jacques Heath (Lily May Peel)
4                               Allen, Mr. William Henry
                             ...                        
886                                Montvila, Rev. Juozas
887                         Graham, Miss. Margaret Edith
888             Johnston, Miss. Catherine Helen "Carrie"
889                                Behr, Mr. Karl Howell
890                                  Dooley, Mr. Patrick
Name: Name, Length: 891, dtype: object

In [None]:
df[['tittle','firstt_name']]=df['Name'].str.split(',').str.get(1).str.strip().str.split(' ',n=1,expand=True)     ################ Expand return the DataFrame ############# and n for upto which ' '

In [None]:
df

In [None]:
df[['title','firstname']] = df['Name'].str.split(',').str.get(1).str.strip().str.split(' ', n=1, expand=True)
df.head()

df['title'].value_counts()

In [None]:
df['title']=df['title'].str.replace('Mr.','Miss')

In [None]:
# replace
df['title'] = df['title'].str.replace('Ms.','Miss.')
df['title'] = df['title'].str.replace('Mlle.','Miss.')

In [None]:
df['title'].value_counts()

In [None]:
#df['title'][df['title'].str.startswith('C')]
df['Ticket'][df['Ticket'].str.isdigit()]

In [None]:
# filtering
# startswith/endswith
df[df['firstname'].str.endswith('A')]
# isdigit/isalpha...
df[df['firstname'].str.isdigit()]

In [None]:
df['Name'][df['Name'].str.contains('Mr',case=False)]
df['last_name'][df['last_name'].str.contains('^[^aeiouAEIOU].+[^aeiouAEIOU]$')]  #^ for first char ^ for not . any char + for one or more char $ for last

In [None]:
# applying regex                  #####################################
# contains
# search john -> both case
df[df['firstname'].str.contains('john',case=False)]       #########################################3
# find lastnames with start and end char vowel
df[df['lastname'].str.contains('^[^aeiouAEIOU].+[^aeiouAEIOU]$')]

In [None]:
df['Name'].str[::-1]

In [None]:
# slicing
df['Name'].str[::-1]

In [None]:
df.head(5)

NameError: name 'df' is not defined

In [None]:
df.head()

NameError: name 'df' is not defined