In [1]:
import numpy as np
import pandas as pd

In [47]:
# Basic Series & DataFrame
# 1. Create a Pandas Series from the list [10, 20, 30, 40].
l = [10, 20, 30, 40]
list_Series = pd.Series(l)
# 2. Convert this dictionary into a DataFrame:
# {'Name':['A','B','C'], 'Age':[20,21,19], 'Marks':[85,78,92]}
d = {'Name':['A','B','C'], 'Age':[20,21,19], 'Marks':[85,78,92]}
dic_df = pd.DataFrame(d)
# 3. Read a CSV file named data.csv into a DataFrame.
df = pd.read_csv("anime.csv")
df.head()
# 4. Display the first 3 rows of a DataFrame called df.
df.head(3)
# 5. Show only the column names of df.
print(df.columns)

Index(['Rank', 'Title', 'Score'], dtype='object')


In [3]:
# Indexing & Selection
# 6. Select the “Age” column from df.
data = {
    'Name': ['John', 'Anna', 'Peter', 'Linda'],
    'Age': [28, 34, 29, 42],
    'City': ['New York', 'Paris', 'Berlin', 'London'],
    'Salary': [65000, 70000, 62000, 85000]
}
df = pd.DataFrame(data)
df.iloc[:, 1]
# 7. Select rows where Age > 30.
df[df["Age"]>30]
# 8. Select rows 2 to 3 (inclusive of 2, exclusive of 4).
df.loc[2:,:]
# 9. Select Name and Salary columns together.
df[["Name","Salary"]]
# 10. Use loc to select rows where Name = "John".
df.loc[0]

Name          John
Age             28
City      New York
Salary       65000
Name: 0, dtype: object

In [4]:
data = {
    'A': [1, 2, np.nan, 4, 5],
    'B': [1, 2, 3, 4, 5],
    'C': [1, 2, 3, np.nan, np.nan],
    'D': [1, np.nan, np.nan, np.nan, 5]
}
df = pd.DataFrame(data)
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,
2,,3,3.0,
3,4.0,4,,
4,5.0,5,,5.0


In [5]:
# Missing Data
# 11. Find the number of missing values in each column.
df.isna().sum()
# 12. Fill all missing values with 0.
df.fillna(0, inplace=True)
# 13. Drop rows that contain any missing values.
df.dropna()
# 14. Replace missing values in the column “C” with the column’s mean.
df["C"] = df["C"].fillna(df["C"].mean())
df

Unnamed: 0,A,B,C,D
0,1.0,1,1.0,1.0
1,2.0,2,2.0,0.0
2,0.0,3,3.0,0.0
3,4.0,4,0.0,0.0
4,5.0,5,0.0,5.0


In [6]:
# Aggregation & Stats
df_dic
# 15. Find the mean of all numeric columns.
df.mean()
# 16. Find the maximum value in the “Marks” column.
df["B"].max()
# 17. Find the student with the highest marks.
topper = df_dic.loc[df_dic['Marks'].idxmax()]
topper
# 18. Calculate the sum of all marks.
df_dic["Marks"].sum()

255

In [7]:
df_dic

Unnamed: 0,Name,Age,Marks
0,A,20,85
1,B,21,78
2,C,19,92


In [8]:
# GroupBy & Analysis
# 19. Group data by “Class” column and find the average marks.?
data = {
    'Name': ['A', 'B', 'C', 'D', 'E', 'F'],
    'Class': ['10A', '10A', '10B', '10B', '10A', '10C'],
    'Gender': ['M', 'F', 'M', 'F', 'M', 'F'],
    'Marks': [85, 78, 92, 67, 88, 73]
}

df2 = pd.DataFrame(data)
print(df2)
class_grp = df2.groupby('Class')['Marks'].mean()
class_grp

  Name Class Gender  Marks
0    A   10A      M     85
1    B   10A      F     78
2    C   10B      M     92
3    D   10B      F     67
4    E   10A      M     88
5    F   10C      F     73


Class
10A    83.666667
10B    79.500000
10C    73.000000
Name: Marks, dtype: float64

In [9]:
# 20. Count how many students are in each class.
df2.groupby('Class')['Name'].count()

Class
10A    3
10B    2
10C    1
Name: Name, dtype: int64

In [10]:
# 21. Group by gender and find the highest marks for each gender.
df2.groupby("Gender")["Marks"].max()

Gender
F    78
M    92
Name: Marks, dtype: int64

In [11]:
# Sorting & Filtering
# 22. Sort the DataFrame by the “Marks” column in descending order.
df2.sort_values(by="Marks",ascending = False,inplace=True)
df2

Unnamed: 0,Name,Class,Gender,Marks
2,C,10B,M,92
4,E,10A,M,88
0,A,10A,M,85
1,B,10A,F,78
5,F,10C,F,73
3,D,10B,F,67


In [14]:
# 23. Filter rows where Marks are between 70 and 90.
df2
df2[(df2["Marks"]>=70) & (df2["Marks"]<=90)]

Unnamed: 0,Name,Class,Gender,Marks
4,E,10A,M,88
0,A,10A,M,85
1,B,10A,F,78
5,F,10C,F,73


In [15]:
# 24. Show the top 5 rows with the highest marks.
df2.sort_values(by="Marks", ascending=False).head(5)

Unnamed: 0,Name,Class,Gender,Marks
2,C,10B,M,92
4,E,10A,M,88
0,A,10A,M,85
1,B,10A,F,78
5,F,10C,F,73


In [20]:
# Merging, Joining, Concatenation
df1 = pd.DataFrame({
    'A': ['A0', 'A1', 'A2'],
    'B': ['B0', 'B1', 'B2'],
    'C': ['C0', 'C1', 'C2']
})

df2 = pd.DataFrame({
    'A': ['A3', 'A4', 'A5'],
    'B': ['B3', 'B4', 'B5'],
    'C': ['C3', 'C4', 'C5']
})
# 25. Concatenate two DataFrames df1 and df2 vertically.
pd.concat([df1,df2])

Unnamed: 0,A,B,C
0,A0,B0,C0
1,A1,B1,C1
2,A2,B2,C2
0,A3,B3,C3
1,A4,B4,C4
2,A5,B5,C5


In [24]:
# 26. Merge df1 and df2 on a common column called “ID”.
employees = pd.DataFrame({
    'employee_id': [1, 2, 3, 4, 5],
    'name': ['John', 'Anna', 'Peter', 'Linda', 'Bob'],
    'department': ['HR', 'IT', 'Finance', 'IT', 'HR']
})

# DataFrame 2: Salary information
salaries = pd.DataFrame({
    'employee_id': [1, 2, 3, 6, 7],
    'salary': [60000, 80000, 65000, 70000, 90000],
    'bonus': [5000, 10000, 7000, 8000, 12000]
})
pd.merge(employees,salaries,on='employee_id',how='left')

Unnamed: 0,employee_id,name,department,salary,bonus
0,1,John,HR,60000.0,5000.0
1,2,Anna,IT,80000.0,10000.0
2,3,Peter,Finance,65000.0,7000.0
3,4,Linda,IT,,
4,5,Bob,HR,,


In [25]:
# 27. Join df1 and df2 using .join().
df1 = pd.DataFrame({
    'name': ['Alice', 'Bob', 'Charlie']
}, index=[1, 2, 3])

# Second DataFrame
df2 = pd.DataFrame({
    'score': [85, 90, 75]
}, index=[2, 3, 4])

In [27]:
df2

Unnamed: 0,score
2,85
3,90
4,75


In [28]:
df1.join(df2)

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0


In [31]:
df1.join(df2,how='outer')

Unnamed: 0,name,score
1,Alice,
2,Bob,85.0
3,Charlie,90.0
4,,75.0


In [34]:
# Apply, Lambda, Map
data = {
    'Name': ['A', 'B', 'C', 'D', 'E'],
    'Marks': [92, 76, 88, 59, 45]
}

df = pd.DataFrame(data)
# 28. Create a new column “Status” such that:
# Marks ≥ 90 → “Excellent”
# 75–89 → “Good”
# Else → “Average”
# Use apply() or np.where().
df["Status"] = np.where(df["Marks"]>=90,'Excellent',
               np.where(df["Marks"]>=75, 'Good', 'Average'))
print(df)

  Name  Marks     Status
0    A     92  Excellent
1    B     76       Good
2    C     88       Good
3    D     59    Average
4    E     45    Average


In [35]:
def get_status(m):
    if m >= 90:
        return "Excellent"
    elif m >= 75:
        return "Good"
    else:
        return "Average"

df["Status"] = df["Marks"].apply(get_status)
print(df)

  Name  Marks     Status
0    A     92  Excellent
1    B     76       Good
2    C     88       Good
3    D     59    Average
4    E     45    Average


In [37]:
# 29. Apply a lambda function to increase all Marks by 10%.
df["Marks_10percent"] = df["Marks"].apply(lambda x : x * 1.10)
df

Unnamed: 0,Name,Marks,Status,Marks_10percent
0,A,92,Excellent,101.2
1,B,76,Good,83.6
2,C,88,Good,96.8
3,D,59,Average,64.9
4,E,45,Average,49.5


In [46]:
# 30. Convert all names to uppercase using .str.upper().
df["Lower_Name"] = df["Name"].str.lower()
df["Name"] = df["Name"].str.upper()
df

Unnamed: 0,Name,Marks,Status,Marks_10percent,Lower_Name
0,A,92,Excellent,101.2,a
1,B,76,Good,83.6,b
2,C,88,Good,96.8,c
3,D,59,Average,64.9,d
4,E,45,Average,49.5,e
