# Pandas String Functions
This notebook demonstrates various pandas string functions with examples.

In [1]:
import pandas as pd

# Sample DataFrame
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David', 'Eva'],
    'City': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix'],
    'Department': ['HR', 'Financea', 'Marketing', 'IT', 'Operations']
}
df = pd.DataFrame(data)
df

Unnamed: 0,Name,City,Department
0,Alice,New York,HR
1,Bob,Los Angeles,Financea
2,Charlie,Chicago,Marketing
3,David,Houston,IT
4,Eva,Phoenix,Operations


## 1. str.lower() and str.upper()
Converts strings to lowercase and uppercase respectively.

In [None]:
df['Name'] = df['Name'].str.lower()

In [None]:
df

Unnamed: 0,Name,City,Department
0,alice,New York,HR
1,bob,Los Angeles,Financea
2,charlie,Chicago,Marketing
3,david,Houston,IT
4,eva,Phoenix,Operations


In [None]:
# Convert 'Name' column to lowercase
df['Name_lower'] = df['Name'].str.lower()
# Convert 'City' column to uppercase
df['City_upper'] = df['City'].str.upper()
df

Unnamed: 0,Name,City,Department,Name_lower,City_upper
0,alice,New York,HR,alice,NEW YORK
1,bob,Los Angeles,Financea,bob,LOS ANGELES
2,charlie,Chicago,Marketing,charlie,CHICAGO
3,david,Houston,IT,david,HOUSTON
4,eva,Phoenix,Operations,eva,PHOENIX


## 2. str.contains()
Checks if strings contain a substring.

In [None]:
df['Name']

Unnamed: 0,Name
0,alice
1,bob
2,charlie
3,david
4,eva


In [None]:
df[df['Name'].str.contains('e')]

Unnamed: 0,Name,City,Department,Name_lower,City_upper
0,alice,New York,HR,alice,NEW YORK
2,charlie,Chicago,Marketing,charlie,CHICAGO
4,eva,Phoenix,Operations,eva,PHOENIX


In [None]:
# Check if 'City' column contains 'New'
df['City_contains_New'] = df['City'].str.contains('a')
df[['City','City_contains_New']]

Unnamed: 0,City,City_contains_New
0,New York,False
1,Los Angeles,False
2,Chicago,True
3,Houston,False
4,Phoenix,False


In [None]:
df[df['City'].str.contains('t')]

Unnamed: 0,Name,City,Department,Name_lower,City_upper,City_contains_New
3,david,Houston,IT,david,HOUSTON,False


## 3. str.replace()
Replaces occurrences of a string with another string.

In [None]:
df['Name'].str.replace("a","X")

Unnamed: 0,Name
0,Xlice
1,bob
2,chXrlie
3,dXvid
4,evX


In [None]:
df

Unnamed: 0,Name,City,Department,Name_lower,City_upper,City_contains_New
0,alice,New York,HR,alice,NEW YORK,False
1,bob,Los Angeles,Financea,bob,LOS ANGELES,False
2,charlie,Chicago,Marketing,charlie,CHICAGO,True
3,david,Houston,IT,david,HOUSTON,False
4,eva,Phoenix,Operations,eva,PHOENIX,False


In [None]:
# Replace 'New' with 'Old' in 'City' column
df['City_replaced'] = df['City'].str.replace('o', '@')
df[['City','City_replaced']]

Unnamed: 0,City,City_replaced
0,New York,New Y@rk
1,Los Angeles,L@s Angeles
2,Chicago,Chicag@
3,Houston,H@ust@n
4,Phoenix,Ph@enix


In [None]:
# Replace 'New' with 'Old' in 'City' column
df['City_replaced'] = df['City'].str.replace('o', '@@')
df

Unnamed: 0,Name,City,Department,Name_lower,City_upper,City_contains_New,City_replaced
0,alice,New York,HR,alice,NEW YORK,False,New Y@@rk
1,bob,Los Angeles,Financea,bob,LOS ANGELES,False,L@@s Angeles
2,charlie,Chicago,Marketing,charlie,CHICAGO,True,Chicag@@
3,david,Houston,IT,david,HOUSTON,False,H@@ust@@n
4,eva,Phoenix,Operations,eva,PHOENIX,False,Ph@@enix


In [None]:
 df.replace('Alice',"asdf", inplace=True)
df

Unnamed: 0,Name,City,Department,Name_lower,City_upper,City_contains_New,City_replaced
0,alice,New York,HR,alice,NEW YORK,False,New Y@@rk
1,bob,Los Angeles,Financea,bob,LOS ANGELES,False,L@@s Angeles
2,charlie,Chicago,Marketing,charlie,CHICAGO,True,Chicag@@
3,david,Houston,IT,david,HOUSTON,False,H@@ust@@n
4,eva,Phoenix,Operations,eva,PHOENIX,False,Ph@@enix


## 4. str.len()
Returns the length of each string in the Series.

In [None]:
# Get length of strings in 'Name' column
df['Name_length'] = df['Name'].str.len()
df

Unnamed: 0,Name,City,Department,Name_lower,City_upper,City_contains_New,City_replaced,Name_length
0,alice,New York,HR,alice,NEW YORK,False,New Y@@rk,5
1,bob,Los Angeles,Financea,bob,LOS ANGELES,False,L@@s Angeles,3
2,charlie,Chicago,Marketing,charlie,CHICAGO,True,Chicag@@,7
3,david,Houston,IT,david,HOUSTON,False,H@@ust@@n,5
4,eva,Phoenix,Operations,eva,PHOENIX,False,Ph@@enix,3


## 5. str.startswith() and str.endswith()
Checks if strings start or end with a specified substring.

In [None]:
df['Name']

Unnamed: 0,Name
0,alice
1,bob
2,charlie
3,david
4,eva


In [None]:
df[df['Name'].str.endswith("e")]

Unnamed: 0,Name,City,Department,Name_lower,City_upper,City_contains_New,City_replaced,Name_length
0,alice,New York,HR,alice,NEW YORK,False,New Y@@rk,5
2,charlie,Chicago,Marketing,charlie,CHICAGO,True,Chicag@@,7


In [None]:
# Check if 'Department' column starts with 'M'
df['Dept_starts_with_M'] = df['Department'].str.startswith('M')
# Check if 'Department' column ends with 'e'
df['Dept_ends_with_e'] = df['Department'].str.endswith('a')
df[['Department','Dept_starts_with_M','Dept_ends_with_e']]

Unnamed: 0,Department,Dept_starts_with_M,Dept_ends_with_e
0,HR,False,False
1,Financea,False,True
2,Marketing,True,False
3,IT,False,False
4,Operations,False,False


In [None]:
df[df['Department'].str.startswith('M')]

Unnamed: 0,Name,City,Department,Name_lower,City_upper,City_contains_New,City_replaced,Name_length,Dept_starts_with_M,Dept_ends_with_e
2,charlie,Chicago,Marketing,charlie,CHICAGO,True,Chicag@@,7,True,False


## 6. str.split()
Splits strings around a given separator/delimiter.

In [None]:
string1 = "This/is/ a/ Pandas/ Class"
string1.split('@#$@#$@$#@#')

['This/is/ a/ Pandas/ Class']

In [None]:
df['City']

Unnamed: 0,City
0,New York
1,Los Angeles
2,Chicago
3,Houston
4,Phoenix


In [None]:
# Split 'City' column by space
df['City_split'] = df['City'].str.split()
df[['City','City_split']]

Unnamed: 0,City,City_split
0,New York,"[New, York]"
1,Los Angeles,"[Los, Angeles]"
2,Chicago,[Chicago]
3,Houston,[Houston]
4,Phoenix,[Phoenix]


## 7. str.strip(), str.lstrip(), str.rstrip()
Removes leading and/or trailing whitespace.

In [None]:
String2 = "    this is a pandas class.     "
print("Original: /"+String2+"/")
str1 = String2.strip()
print("Strip Method: /"+str1+"/")
str2 = String2.lstrip()
print("lStrip Method: /"+str2+"/")

str3 = String2.rstrip()
print("rStrip Method: /"+str3+"/")


Original: /    this is a pandas class.     /
Strip Method: /this is a pandas class./
lStrip Method: /this is a pandas class.     /
rStrip Method: /    this is a pandas class./


In [None]:
# Add extra whitespace for demonstration
df['Name_whitespace'] = ['  Alice  ', '  Bob', 'Charlie  ', '  David', 'Eva  ']

# Remove leading and trailing whitespace
df['Name_strip'] = df['Name_whitespace'].str.strip()
# Remove leading whitespace
df['Name_lstrip'] = df['Name_whitespace'].str.lstrip()
# Remove trailing whitespace
df['Name_rstrip'] = df['Name_whitespace'].str.rstrip()
df[['Name_whitespace','Name_strip','Name_lstrip','Name_rstrip'] ]

Unnamed: 0,Name_whitespace,Name_strip,Name_lstrip,Name_rstrip
0,Alice,Alice,Alice,Alice
1,Bob,Bob,Bob,Bob
2,Charlie,Charlie,Charlie,Charlie
3,David,David,David,David
4,Eva,Eva,Eva,Eva


## 8. str.findall()
Finds all occurrences of a pattern in each string of the Series.

In [None]:
# Find all occurrences of 'a' in 'Department' column
df['Dept_findall_a'] = df['Department'].str.findall('a')
df[['Department','Dept_findall_a']]

Unnamed: 0,Department,Dept_findall_a
0,HR,[]
1,Financea,"[a, a]"
2,Marketing,[a]
3,IT,[]
4,Operations,[a]


## 9. str.cat()
Concatenates strings in the Series/Index with given separator.

In [None]:
str1 = "test"
str2 = "test1"

str1+str2

'testtest1'

In [None]:
# Concatenate 'Name' and 'City' columns with a separator
df['Name_City'] = df['Name'].str.cat(df['City']," - ")
df

df[['Name','City','Name_City']]

Unnamed: 0,Name,City,Name_City
0,alice,New York,alice - New York
1,bob,Los Angeles,bob - Los Angeles
2,charlie,Chicago,charlie - Chicago
3,david,Houston,david - Houston
4,eva,Phoenix,eva - Phoenix


## 10. str.get()
Extract element from each string in the Series based on position.

In [None]:
str1 = "this is a demo"
str1[1]

'h'

In [None]:
# Get first character of each string in 'Name' column
df['Name_first_char'] = df['Name'].str.get(0)
# Get last character of each string in 'Name' column
df['Name_last_char'] = df['Name'].str.get(-1)
df[['Name', 'Name_first_char', 'Name_last_char']]

Unnamed: 0,Name,Name_first_char,Name_last_char
0,alice,a,e
1,bob,b,b
2,charlie,c,e
3,david,d,d
4,eva,e,a


In [None]:
str = "Thisis atest,string"

arr = str.split()

In [None]:
arr

['Thisis', 'atest,string']

In [None]:
for x in arr:
  print(x.split(','))

['Thisis']
['atest', 'string']
