## Data Cleaning using Pandas

### Checking Missing Values in Pandas



In [1]:
## 1. Using isnull()
# a. Finding the missing values
import pandas as pd
import numpy as np

d = {'First Score': [100, 90, np.nan, 95],
        'Second Score': [30, 45, 56, np.nan],
        'Third Score': [np.nan, 40, 80, 98]}
df = pd.DataFrame(d)
df

Unnamed: 0,First Score,Second Score,Third Score
0,100.0,30.0,
1,90.0,45.0,40.0
2,,56.0,80.0
3,95.0,,98.0


In [2]:
mv = df.isnull()
mv

Unnamed: 0,First Score,Second Score,Third Score
0,False,False,True
1,False,False,False
2,True,False,False
3,False,True,False


In [None]:
# b. Filtering Data Based on Missing Values
d = pd.read_csv('employees.csv')
d

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.170,True,
2,Maria,Female,4/23/1993,11:17 AM,130590,11.858,False,Finance
3,Jerry,Male,3/4/2005,1:00 PM,138705,9.340,True,Finance
4,Larry,Male,1/24/1998,4:47 PM,101004,1.389,True,Client Services
...,...,...,...,...,...,...,...,...
995,Henry,,11/23/2014,6:09 AM,132483,16.655,False,Distribution
996,Phillip,Male,1/31/1984,6:30 AM,42392,19.675,False,Finance
997,Russell,Male,5/20/2013,12:39 PM,96914,1.421,False,Product
998,Larry,Male,4/20/2013,4:45 PM,60500,11.985,False,Business Development


In [6]:
bool_series = pd.isnull(d['Gender'])
missing_gender_data = d[bool_series]
missing_gender_data

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
20,Lois,,4/22/1995,7:18 PM,64714,4.934,True,Legal
22,Joshua,,3/8/2012,1:58 AM,90816,18.816,True,Client Services
27,Scott,,7/11/1991,6:58 PM,122367,5.218,False,Legal
31,Joyce,,2/20/2005,2:40 PM,88657,12.752,False,Product
41,Christine,,6/28/2015,1:08 AM,66582,11.308,True,Business Development
...,...,...,...,...,...,...,...,...
961,Antonio,,6/18/1989,9:37 PM,103050,3.050,False,Legal
972,Victor,,7/28/2006,2:49 PM,76381,11.159,True,Sales
985,Stephen,,7/10/1983,8:10 PM,85668,1.909,False,Legal
989,Justin,,2/10/1991,4:58 PM,38344,3.794,False,Legal


In [7]:
## 2. Checking for Non-Missing Values using Notnull()
d = {'First Score': [100, 90, np.nan, 95],
        'Second Score': [30, 45, 56, np.nan],
        'Third Score': [np.nan, 40, 80, 98]}

df = pd.DataFrame(d)

nmv = df.notnull()
nmv

Unnamed: 0,First Score,Second Score,Third Score
0,True,True,False
1,True,True,True
2,False,True,True
3,True,False,True


In [8]:
# b. Filtering Data with Non-Missing Values

d = pd.read_csv('employees.csv')

nmg = pd.notnull(d['Gender'])

nmgd = d[nmg]

nmgd

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.170,True,
2,Maria,Female,4/23/1993,11:17 AM,130590,11.858,False,Finance
3,Jerry,Male,3/4/2005,1:00 PM,138705,9.340,True,Finance
4,Larry,Male,1/24/1998,4:47 PM,101004,1.389,True,Client Services
...,...,...,...,...,...,...,...,...
994,George,Male,6/21/2013,5:47 PM,98874,4.479,True,Marketing
996,Phillip,Male,1/31/1984,6:30 AM,42392,19.675,False,Finance
997,Russell,Male,5/20/2013,12:39 PM,96914,1.421,False,Product
998,Larry,Male,4/20/2013,4:45 PM,60500,11.985,False,Business Development


### Filling Missing Values 


In [9]:
# 1. Using fillna()
# a. Fill Missing Values with Zero
d = {'First Score': [100, 90, np.nan, 95],
        'Second Score': [30, 45, 56, np.nan],
        'Third Score': [np.nan, 40, 80, 98]}

df = pd.DataFrame(d)
df.fillna(0)

Unnamed: 0,First Score,Second Score,Third Score
0,100.0,30.0,0.0
1,90.0,45.0,40.0
2,0.0,56.0,80.0
3,95.0,0.0,98.0


In [10]:
# b. Fill with Previous Value (Forward Fill)
df.fillna(method='pad') 

  df.fillna(method='pad')


Unnamed: 0,First Score,Second Score,Third Score
0,100.0,30.0,
1,90.0,45.0,40.0
2,90.0,56.0,80.0
3,95.0,56.0,98.0


In [11]:
# c. Fill with Next Value (Backward Fill)
df.fillna(method='bfill')

  df.fillna(method='bfill')


Unnamed: 0,First Score,Second Score,Third Score
0,100.0,30.0,40.0
1,90.0,45.0,40.0
2,95.0,56.0,80.0
3,95.0,,98.0


In [13]:
# d. Fill NaN Values with 'No Gender'
d = pd.read_csv('employees.csv')
d[10:25]

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
10,Louise,Female,8/12/1980,9:01 AM,63241,15.132,True,
11,Julie,Female,10/26/1997,3:19 PM,102508,12.637,True,Legal
12,Brandon,Male,12/1/1980,1:08 AM,112807,17.492,True,Human Resources
13,Gary,Male,1/27/2008,11:40 PM,109831,5.831,False,Sales
14,Kimberly,Female,1/14/1999,7:13 AM,41426,14.543,True,Finance
15,Lillian,Female,6/5/2016,6:09 AM,59414,1.256,False,Product
16,Jeremy,Male,9/21/2010,5:56 AM,90370,7.369,False,Human Resources
17,Shawn,Male,12/7/1986,7:45 PM,111737,6.414,False,Product
18,Diana,Female,10/23/1981,10:27 AM,132940,19.082,False,Client Services
19,Donna,Female,7/22/2010,3:48 AM,81014,1.894,False,Product


In [14]:
d["Gender"].fillna('No Gender', inplace=True)
d[10:25]

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  d["Gender"].fillna('No Gender', inplace=True)


Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
10,Louise,Female,8/12/1980,9:01 AM,63241,15.132,True,
11,Julie,Female,10/26/1997,3:19 PM,102508,12.637,True,Legal
12,Brandon,Male,12/1/1980,1:08 AM,112807,17.492,True,Human Resources
13,Gary,Male,1/27/2008,11:40 PM,109831,5.831,False,Sales
14,Kimberly,Female,1/14/1999,7:13 AM,41426,14.543,True,Finance
15,Lillian,Female,6/5/2016,6:09 AM,59414,1.256,False,Product
16,Jeremy,Male,9/21/2010,5:56 AM,90370,7.369,False,Human Resources
17,Shawn,Male,12/7/1986,7:45 PM,111737,6.414,False,Product
18,Diana,Female,10/23/1981,10:27 AM,132940,19.082,False,Client Services
19,Donna,Female,7/22/2010,3:48 AM,81014,1.894,False,Product


In [23]:
## 2. Using replace()
data = pd.read_csv('employees.csv')
data[10:25]

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
10,Louise,Female,8/12/1980,9:01 AM,63241,15.132,True,
11,Julie,Female,10/26/1997,3:19 PM,102508,12.637,True,Legal
12,Brandon,Male,12/1/1980,1:08 AM,112807,17.492,True,Human Resources
13,Gary,Male,1/27/2008,11:40 PM,109831,5.831,False,Sales
14,Kimberly,Female,1/14/1999,7:13 AM,41426,14.543,True,Finance
15,Lillian,Female,6/5/2016,6:09 AM,59414,1.256,False,Product
16,Jeremy,Male,9/21/2010,5:56 AM,90370,7.369,False,Human Resources
17,Shawn,Male,12/7/1986,7:45 PM,111737,6.414,False,Product
18,Diana,Female,10/23/1981,10:27 AM,132940,19.082,False,Client Services
19,Donna,Female,7/22/2010,3:48 AM,81014,1.894,False,Product


In [26]:
data.replace(to_replace=np.nan, value=-99)

Unnamed: 0,First Name,Gender,Start Date,Last Login Time,Salary,Bonus %,Senior Management,Team
0,Douglas,Male,8/6/1993,12:42 PM,97308,6.945,True,Marketing
1,Thomas,Male,3/31/1996,6:53 AM,61933,4.170,True,-99
2,Maria,Female,4/23/1993,11:17 AM,130590,11.858,False,Finance
3,Jerry,Male,3/4/2005,1:00 PM,138705,9.340,True,Finance
4,Larry,Male,1/24/1998,4:47 PM,101004,1.389,True,Client Services
...,...,...,...,...,...,...,...,...
995,Henry,-99,11/23/2014,6:09 AM,132483,16.655,False,Distribution
996,Phillip,Male,1/31/1984,6:30 AM,42392,19.675,False,Finance
997,Russell,Male,5/20/2013,12:39 PM,96914,1.421,False,Product
998,Larry,Male,4/20/2013,4:45 PM,60500,11.985,False,Business Development


In [27]:
## 3. Using interopolate()
df = pd.DataFrame({"A": [12, 4, 5, None, 1], 
                   "B": [None, 2, 54, 3, None], 
                   "C": [20, 16, None, 3, 8], 
                   "D": [14, 3, None, None, 6]}) 

df


Unnamed: 0,A,B,C,D
0,12.0,,20.0,14.0
1,4.0,2.0,16.0,3.0
2,5.0,54.0,,
3,,3.0,3.0,
4,1.0,,8.0,6.0


In [28]:
df.interpolate(method='linear', limit_direction='forward')

Unnamed: 0,A,B,C,D
0,12.0,,20.0,14.0
1,4.0,2.0,16.0,3.0
2,5.0,54.0,9.5,4.0
3,3.0,3.0,3.0,5.0
4,1.0,3.0,8.0,6.0


### Dropping Missing Values in Pandas


In [29]:
# 1. Dropping Rows with At least one null values
dict = {'First Score': [100, 90, np.nan, 95],
        'Second Score': [30, np.nan, 45, 56],
        'Third Score': [52, 40, 80, 98],
        'Fourth Score': [np.nan, np.nan, np.nan, 65]}

df = pd.DataFrame(dict)
df

Unnamed: 0,First Score,Second Score,Third Score,Fourth Score
0,100.0,30.0,52,
1,90.0,,40,
2,,45.0,80,
3,95.0,56.0,98,65.0


In [30]:
df.dropna()

Unnamed: 0,First Score,Second Score,Third Score,Fourth Score
3,95.0,56.0,98,65.0


In [31]:
# 2. Dropping Rows with All Null Values
dict = {'First Score': [100, np.nan, np.nan, 95],
        'Second Score': [30, np.nan, 45, 56],
        'Third Score': [52, np.nan, 80, 98],
        'Fourth Score': [np.nan, np.nan, np.nan, 65]}
df = pd.DataFrame(dict)
df.dropna(how='all')

Unnamed: 0,First Score,Second Score,Third Score,Fourth Score
0,100.0,30.0,52.0,
2,,45.0,80.0,
3,95.0,56.0,98.0,65.0


In [32]:
# 3. Dropping Columns with At least one Null Values
dict = {'First Score': [100, np.nan, np.nan, 95],
        'Second Score': [30, np.nan, 45, 56],
        'Third Score': [52, np.nan, 80, 98],
        'Fourth Score': [60, 67, 68, 65]}
df = pd.DataFrame(dict)

df.dropna(axis=1)

Unnamed: 0,Fourth Score
0,60
1,67
2,68
3,65


In [33]:
# 4. Dropping Rows with Missing Values 
data = pd.read_csv('employees.csv')
nd = data.dropna(axis=0, how='any')

print("Old data Frame length: ",len(data))
print("New data Frame length: ",len(nd))
print("Rows with at least one missing values: ", (len(d) - len(nd)))

Old data Frame length:  1000
New data Frame length:  764
Rows with at least one missing values:  236


## Removing Duplicates 

In [5]:
import pandas as pd

data = {
    "Name": ["Alice", "Bob", "Alice", "David"],
    "Age": [25, 30, 25, 40],
    "City": ["NY", "LA", "NY", "Chicago"]
}

df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,City
0,Alice,25,NY
1,Bob,30,LA
2,Alice,25,NY
3,David,40,Chicago


In [6]:
df_cleaned = df.drop_duplicates()
df_cleaned

Unnamed: 0,Name,Age,City
0,Alice,25,NY
1,Bob,30,LA
3,David,40,Chicago


In [None]:
# 1. Dropping duplicates based on Specific columns
df_cleaned1 = df.drop_duplicates(subset=["Name"])
df_cleaned1

Unnamed: 0,Name,Age,City
0,Alice,25,NY
1,Bob,30,LA
3,David,40,Chicago


In [10]:
# 2. Keeping the Last Occurrance of Duplicates
df_cleaned = df.drop_duplicates(keep='last')
df_cleaned

Unnamed: 0,Name,Age,City
1,Bob,30,LA
2,Alice,25,NY
3,David,40,Chicago


In [11]:
# 3. Dropping All Duplicates
df_cleaned = df.drop_duplicates(keep=False)
df_cleaned

Unnamed: 0,Name,Age,City
1,Bob,30,LA
3,David,40,Chicago


In [12]:
# 4. Modifying the Original Dataframe
df.drop_duplicates(inplace=True)
df

Unnamed: 0,Name,Age,City
0,Alice,25,NY
1,Bob,30,LA
3,David,40,Chicago


In [14]:
df

Unnamed: 0,Name,Age,City
0,Alice,25,NY
1,Bob,30,LA
3,David,40,Chicago


In [16]:
# 5. Dropping Dublicates Based on Partially Identical Columns
data1 = {
    "Name": ["Alice", "Bob", "Alice", "David", "Bob"],
    "Age": [25, 30, 25, 40, 30],
    "City": ["NY", "LA", "NY", "Chicago", "LA"]
}

df1 = pd.DataFrame(data1)
df1_cleaned = df.drop_duplicates(subset=['Name','City'])
df1_cleaned

Unnamed: 0,Name,Age,City
0,Alice,25,NY
1,Bob,30,LA
3,David,40,Chicago


## Pandas Change Datatype

In [17]:
data = {'Name': ['John', 'Alice', 'Bob', 'Eve', 'Charlie'], 
        'Age': [25, 30, 22, 35, 28], 
        'Gender': ['Male', 'Female', 'Male', 'Female', 'Male'], 
        'Salary': [50000, 55000, 40000, 70000, 48000]
        }

df = pd.DataFrame(data)

df['Age'] = df['Age'].astype(float)
print(df.dtypes)

Name       object
Age       float64
Gender     object
Salary      int64
dtype: object


In [18]:
# Coverting a column to a DataTime Type
df['Join Date'] = ['2021-01-01', '2020-05-22', '2022-03-15', '2021-07-30', '2020-11-11']
df

Unnamed: 0,Name,Age,Gender,Salary,Join Date
0,John,25.0,Male,50000,2021-01-01
1,Alice,30.0,Female,55000,2020-05-22
2,Bob,22.0,Male,40000,2022-03-15
3,Eve,35.0,Female,70000,2021-07-30
4,Charlie,28.0,Male,48000,2020-11-11


In [19]:
print(df.dtypes)

Name          object
Age          float64
Gender        object
Salary         int64
Join Date     object
dtype: object


In [20]:
df['Join Date'] = pd.to_datetime(df['Join Date'])
print(df.dtypes)

Name                 object
Age                 float64
Gender               object
Salary                int64
Join Date    datetime64[ns]
dtype: object


In [21]:
# Changing Multiple Columns Data Types
df = df.astype({'Age': 'int64', 'Salary': 'str'})
print(df.dtypes)

Name                 object
Age                   int64
Gender               object
Salary               object
Join Date    datetime64[ns]
dtype: object


## Drop Empty Columns in Pandas 

In [22]:
import numpy as np
import pandas as pd

df = pd.DataFrame({'FirstName': ['Akash','Ashish','Milan'],
                   "Gender": ["","",""],
                   "Age": [0,0,0]})
df['Department'] = np.nan

df

Unnamed: 0,FirstName,Gender,Age,Department
0,Akash,,0,
1,Ashish,,0,
2,Milan,,0,


In [23]:
# 1. Remove ALL Null Value Columns
df.dropna(how='all',axis=1,inplace=True)
df

Unnamed: 0,FirstName,Gender,Age
0,Akash,,0
1,Ashish,,0
2,Milan,,0


In [24]:
# 2. Replace Empty Strings with Null And drop null columns
nan_value = float("NaN")
df.replace("", nan_value, inplace=True)
df

  df.replace("", nan_value, inplace=True)


Unnamed: 0,FirstName,Gender,Age
0,Akash,,0
1,Ashish,,0
2,Milan,,0


In [25]:
df.dropna(how='all',axis=1, inplace=True)
df

Unnamed: 0,FirstName,Age
0,Akash,0
1,Ashish,0
2,Milan,0


In [26]:
# 3. Replace Zeros with Null and Drop null columns
nan_value = float("NaN")
df.replace(0, nan_value, inplace=True)
df

Unnamed: 0,FirstName,Age
0,Akash,
1,Ashish,
2,Milan,


In [27]:
df.dropna(how='all',axis=1,inplace=True)
df

Unnamed: 0,FirstName
0,Akash
1,Ashish
2,Milan


In [None]:
# 4. Repalce both Zeros and Empty strings with null and drop null columns

df = pd.DataFrame({'FirstName': ['Akash','Ashish','Milan'],
                   "Gender": ["","",""],
                   "Age": [0,0,0]})
df['Department'] = np.nan
nan_value = float("NaN")
df.replace(0, nan_value, inplace=True)
df.replace("", nan_value, inplace=True)

df.dropna(how='all',axis=1, inplace=True)
df

  df.replace("", nan_value, inplace=True)


Unnamed: 0,FirstName
0,Akash
1,Ashish
2,Milan


## String Manipulation in Pandas


In [29]:
import pandas as pd
import numpy as np

data = {'Names': ['Gulshan', 'Shashank', 'Bablu', 'Abhishek', 'Anand', np.nan, 'Pratap'],
        'City': ['Delhi', 'Mumbai', 'Kolkata', 'Delhi', 'Chennai', 'Bangalore', 'Hyderabad']}

df = pd.DataFrame(data)
df

Unnamed: 0,Names,City
0,Gulshan,Delhi
1,Shashank,Mumbai
2,Bablu,Kolkata
3,Abhishek,Delhi
4,Anand,Chennai
5,,Bangalore
6,Pratap,Hyderabad


In [30]:
print(df.dtypes)

Names    object
City     object
dtype: object


In [None]:
print(df.astype('string'))

      Names       City
0   Gulshan      Delhi
1  Shashank     Mumbai
2     Bablu    Kolkata
3  Abhishek      Delhi
4     Anand    Chennai
5      <NA>  Bangalore
6    Pratap  Hyderabad


In [32]:
print(df.dtypes)

Names    object
City     object
dtype: object


In [33]:
# lower()
print(df['Names'].str.lower())

0     gulshan
1    shashank
2       bablu
3    abhishek
4       anand
5         NaN
6      pratap
Name: Names, dtype: object


In [34]:
# upper()
print(df['Names'].str.upper())

0     GULSHAN
1    SHASHANK
2       BABLU
3    ABHISHEK
4       ANAND
5         NaN
6      PRATAP
Name: Names, dtype: object


In [35]:
# strip()
print(df['Names'].str.strip())

0     Gulshan
1    Shashank
2       Bablu
3    Abhishek
4       Anand
5         NaN
6      Pratap
Name: Names, dtype: object


In [36]:
# split('')
df['Split_Names'] = df['Names'].str.split('a')
print(df[['Names', 'Split_Names']])

      Names   Split_Names
0   Gulshan    [Gulsh, n]
1  Shashank  [Sh, sh, nk]
2     Bablu      [B, blu]
3  Abhishek    [Abhishek]
4     Anand      [An, nd]
5       NaN           NaN
6    Pratap    [Pr, t, p]


In [37]:
# len()
print(df['Names'].str.len())

0    7.0
1    8.0
2    5.0
3    8.0
4    5.0
5    NaN
6    6.0
Name: Names, dtype: float64


In [38]:
# cat(sep='')
print(df)

print("\nAfter using Cat: ")
print(df['Names'].str.cat(sep=', '))

      Names       City   Split_Names
0   Gulshan      Delhi    [Gulsh, n]
1  Shashank     Mumbai  [Sh, sh, nk]
2     Bablu    Kolkata      [B, blu]
3  Abhishek      Delhi    [Abhishek]
4     Anand    Chennai      [An, nd]
5       NaN  Bangalore           NaN
6    Pratap  Hyderabad    [Pr, t, p]

After using Cat: 
Gulshan, Shashank, Bablu, Abhishek, Anand, Pratap


In [39]:
# get_dummies()
print(df['City'].str.get_dummies())

   Bangalore  Chennai  Delhi  Hyderabad  Kolkata  Mumbai
0          0        0      1          0        0       0
1          0        0      0          0        0       1
2          0        0      0          0        1       0
3          0        0      1          0        0       0
4          0        1      0          0        0       0
5          1        0      0          0        0       0
6          0        0      0          1        0       0


In [40]:
# startswith()
print(df['Names'].str.startswith('G'))

0     True
1    False
2    False
3    False
4    False
5      NaN
6    False
Name: Names, dtype: object


In [41]:
# endswith()
print(df['Names'].str.endswith('k'))

0    False
1     True
2    False
3     True
4    False
5      NaN
6    False
Name: Names, dtype: object


In [43]:
# replace(a,b)
print(df['Names'].str.replace('Anand', 'Akash'))

0     Gulshan
1    Shashank
2       Bablu
3    Abhishek
4       Akash
5         NaN
6      Pratap
Name: Names, dtype: object


In [None]:
# repeat(value)
print(df['Names'].str.repeat(2))

0      GulshanGulshan
1    ShashankShashank
2          BabluBablu
3    AbhishekAbhishek
4          AnandAnand
5                 NaN
6        PratapPratap
Name: Names, dtype: object


In [55]:
# count()
print(df['Names'].str.repeat(2).value_counts())

Names
GulshanGulshan      1
ShashankShashank    1
BabluBablu          1
AbhishekAbhishek    1
AnandAnand          1
PratapPratap        1
Name: count, dtype: int64


In [60]:
data1 = {'Names': ['Gulshan','Gulshan', 'Shashank', 'Bablu', 'Abhishek', 'Anand', np.nan, 'Pratap'],
        'City': ['Delhi','Delhi',  'Mumbai', 'Kolkata', 'Delhi', 'Chennai', 'Bangalore', 'Hyderabad']}
df = pd.DataFrame(data1)
df

Unnamed: 0,Names,City
0,Gulshan,Delhi
1,Gulshan,Delhi
2,Shashank,Mumbai
3,Bablu,Kolkata
4,Abhishek,Delhi
5,Anand,Chennai
6,,Bangalore
7,Pratap,Hyderabad


In [61]:
print(df['Names'].value_counts())

Names
Gulshan     2
Shashank    1
Bablu       1
Abhishek    1
Anand       1
Pratap      1
Name: count, dtype: int64


In [63]:
df

Unnamed: 0,Names,City
0,Gulshan,Delhi
1,Gulshan,Delhi
2,Shashank,Mumbai
3,Bablu,Kolkata
4,Abhishek,Delhi
5,Anand,Chennai
6,,Bangalore
7,Pratap,Hyderabad


In [65]:
# find()
print(df['Names'].str.find('a'))

0    5.0
1    5.0
2    2.0
3    1.0
4   -1.0
5    2.0
6    NaN
7    2.0
Name: Names, dtype: float64


In [66]:
# islower()
print(df['Names'].str.islower())


0    False
1    False
2    False
3    False
4    False
5    False
6      NaN
7    False
Name: Names, dtype: object


In [67]:
# isupper()
print(df['Names'].str.isupper())

0    False
1    False
2    False
3    False
4    False
5    False
6      NaN
7    False
Name: Names, dtype: object


In [68]:
# isnumeric()
print(df['Names'].str.isnumeric())

0    False
1    False
2    False
3    False
4    False
5    False
6      NaN
7    False
Name: Names, dtype: object


In [69]:
# swapcase()
print(df['Names'].str.swapcase())

0     gULSHAN
1     gULSHAN
2    sHASHANK
3       bABLU
4    aBHISHEK
5       aNAND
6         NaN
7      pRATAP
Name: Names, dtype: object
