In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie", "David", np.nan],
    "Age": [25, np.nan, 30, 22, 28],
    "City": ["New York", "Los Angeles", np.nan, "Chicago", "Houston"],
    "Salary": [50000, 60000, np.nan, 52000, 58000]
})

In [3]:
df

Unnamed: 0,Name,Age,City,Salary
0,Alice,25.0,New York,50000.0
1,Bob,,Los Angeles,60000.0
2,Charlie,30.0,,
3,David,22.0,Chicago,52000.0
4,,28.0,Houston,58000.0


In [4]:
#Count how many missing values are in a DataFrame.

df.isnull().sum()

Name      1
Age       1
City      1
Salary    1
dtype: int64

In [5]:
#Show which values are missing in a DataFrame (True/False).

df.isnull()

Unnamed: 0,Name,Age,City,Salary
0,False,False,False,False
1,False,True,False,False
2,False,False,True,True
3,False,False,False,False
4,True,False,False,False


In [6]:
#Remove all rows with missing values.

df_clean = df.dropna()
df_clean

Unnamed: 0,Name,Age,City,Salary
0,Alice,25.0,New York,50000.0
3,David,22.0,Chicago,52000.0


In [7]:
#Fill missing values in a column with 0.

df.fillna(0, axis=1)

Unnamed: 0,Name,Age,City,Salary
0,Alice,25.0,New York,50000.0
1,Bob,0.0,Los Angeles,60000.0
2,Charlie,30.0,0,0.0
3,David,22.0,Chicago,52000.0
4,0,28.0,Houston,58000.0


In [8]:
#Fill missing values in a column with "Not Available".

df.fillna("Not Available", axis = 1)

Unnamed: 0,Name,Age,City,Salary
0,Alice,25.0,New York,50000.0
1,Bob,Not Available,Los Angeles,60000.0
2,Charlie,30.0,Not Available,Not Available
3,David,22.0,Chicago,52000.0
4,Not Available,28.0,Houston,58000.0


In [9]:
#Print the maximum and minimum value of a column.

print(df['Age'].max())
print(df['Salary'].min())

30.0
50000.0


In [10]:
#Find values in a column that are greater than 100.

data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Age": [25, 30, 35, 22, 28]
}

df = pd.DataFrame(data)

df_filtered = df[df["Age"] > 25]

print(df_filtered)


      Name  Age
1      Bob   30
2  Charlie   35
4      Eve   28


In [15]:
#Replace all values greater than 25 with 100.

data = {
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Age": [25, 30, 35, 22, 28]
}

df = pd.DataFrame(data)

df_replaced = df['Age']. where(df['Age'] <= 25, 100)
df_replaced



0     25
1    100
2    100
3     22
4    100
Name: Age, dtype: int64

In [12]:
#Sort a column to check for extreme values.

df.sort_values(by = 'Age', ascending= False)

Unnamed: 0,Name,Age
2,Charlie,35
1,Bob,30
4,Eve,28
0,Alice,25
3,David,22


In [13]:
#Remove the highest value from a numeric column.

max_value = df['Age'].max()

df_filtered = df[ df['Age'] != max_value]
print(df_filtered)

    Name  Age
0  Alice   25
1    Bob   30
3  David   22
4    Eve   28


In [24]:
 #Convert column Gender with Male/Female into 0/1

df = pd.DataFrame({
    'Name': ['John', 'Alice', 'Bob', 'Mary'],
    'Age': [28, 24, 35, 30],
    'Gender': ['Male', 'Female', 'Male', 'Female']
})

In [25]:
df['Gender'] = df['Gender'].map({'Male' : 0, 'Female' : 1})
df

Unnamed: 0,Name,Age,Gender
0,John,28,0
1,Alice,24,1
2,Bob,35,0
3,Mary,30,1


In [26]:
#Create separate columns for each City from a City column

df = pd.DataFrame({
    'Name': ['John', 'Alice', 'Bob', 'Mary'],
    'City': ['Delhi', 'Mumbai', 'Delhi', 'Chennai']
})

In [36]:
df_encoded = pd.get_dummies(df, columns= ['City'])
df_encoded

Unnamed: 0,Name,City_Chennai,City_Delhi,City_Mumbai
0,John,False,True,False
1,Alice,False,False,True
2,Bob,False,True,False
3,Mary,True,False,False


In [39]:
#Count how many times each category appears in a column

df = pd.DataFrame({
    'Name': ['John', 'Alice', 'Bob', 'Mary', 'Sam', 'Rita', 'John'],
    'City': ['Delhi', 'Mumbai', 'Delhi', 'Chennai', 'Delhi', 'Mumbai', 'Chennai']
})

df['City'].value_counts()

City
Delhi      3
Mumbai     2
Chennai    2
Name: count, dtype: int64

In [41]:
#Change all values in a column to lowercase

df['Name'].str.lower()


0     john
1    alice
2      bob
3     mary
4      sam
5     rita
6     john
Name: Name, dtype: object

In [50]:
#Divide all values in a numeric column by 10

df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Age": [25, 30, 35, 22, 28]
})
df

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35
3,David,22
4,Eve,28


In [52]:
df['Age'] = df['Age'] / 10
print(df)

      Name   Age
0    Alice  0.25
1      Bob  0.30
2  Charlie  0.35
3    David  0.22
4      Eve  0.28


In [55]:
#Subtract the minimum value from a numeric column

df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie", "David", "Eve"],
    "Age": [25, 30, 35, 22, 28]
})
df

Unnamed: 0,Name,Age
0,Alice,25
1,Bob,30
2,Charlie,35
3,David,22
4,Eve,28


In [59]:
min_value = df['Age'].min()

print(min_value)
df['Age_minus_min'] = df['Age'] - min_value
df


22


Unnamed: 0,Name,Age,Age_minus_min
0,Alice,25,3
1,Bob,30,8
2,Charlie,35,13
3,David,22,0
4,Eve,28,6


In [66]:
#Convert all column values to range 0 to 1 manually.
df = pd.DataFrame({
    "Age": [25, 30, 35, 22, 28],
    "Salary": [50000, 60000, 75000, 40000, 55000]
})

for col in df.columns:
    max = df[col].max()
    min = df[col].min()
    df[col] = (df[col] - min) / (max - min)  # Normalization (Min- max scaling)

print(df)

        Age    Salary
0  0.230769  0.285714
1  0.615385  0.571429
2  1.000000  1.000000
3  0.000000  0.000000
4  0.461538  0.428571


In [67]:
#Show the mean and standard deviation of a numeric column.

mean = df['Salary'].mean()
std_deviation = df['Age'].std()

print(mean)
print(std_deviation)

0.45714285714285713
0.38074980525429486


In [71]:
# Subtract column mean from each value (basic standardization).

df = pd.DataFrame({
    "Age": [25, 30, 35, 22, 28],
    "Salary": [50000, 60000, 75000, 40000, 55000]
})


for col in df.columns:

    df[col] = df[col] - df[col].mean()

print(df)

   Age   Salary
0 -3.0  -6000.0
1  2.0   4000.0
2  7.0  19000.0
3 -6.0 -16000.0
4  0.0  -1000.0


In [73]:
# Round all values in a numeric column to 2 decimals.


df = pd.DataFrame({
    "Name": ["Alice", "Bob", "Charlie"],
    "Salary": [50000.1234, 60000.5678, 75000.9876]
})

df['Salary'] = df['Salary'].round(2)

print(df)

      Name    Salary
0    Alice  50000.12
1      Bob  60000.57
2  Charlie  75000.99
