# Module: Pandas Assignments
## Lesson: Pandas
### Assignment 1: DataFrame Creation and Indexing

1. Create a Pandas DataFrame with 4 columns and 6 rows filled with random integers. Set the index to be the first column.

In [9]:
import pandas as pd
import numpy as np
arr = np.random.randint(1, 100, size = (6, 4))
df = pd.DataFrame(arr, columns = ["A", "B", "C", "D"])
df.set_index("A", inplace= True)
df

Unnamed: 0_level_0,B,C,D
A,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
55,93,80,67
40,53,61,12
19,86,91,25
44,22,93,5
20,36,13,24
73,93,7,80


2. Create a Pandas DataFrame with columns 'A', 'B', 'C' and index 'X', 'Y', 'Z'. Fill the DataFrame with random integers and access the element at row 'Y' and column 'B'.


In [13]:
arr = np.random.randint(1, 100, size = (3, 3))
df = pd.DataFrame(arr, columns = ["A", "B", "C"], index=["X","Y","Z"])
print(df)
element = df.at["Y", "B"]
print("Element at rows 'Y' and column 'B':", element)

    A   B   C
X  92  82  85
Y  48  68  13
Z  66  57  23
Element at rows 'Y' and column 'B': 68


### Assignment 2: DataFrame Operations

1. Create a Pandas DataFrame with 3 columns and 5 rows filled with random integers. Add a new column that is the product of the first two columns.

In [15]:
arr = np.random.randint(1, 101, size = (5, 3))
df = pd.DataFrame(arr, columns = ['A','B', 'C'])
print("Original DataFrame:")
print(df)
df["product"] = df["A"] * df["B"]
print("DataFrame with new column:")
print(df)

Original DataFrame:
    A   B   C
0  62  20  21
1  20  24  47
2  69  48  39
3  68  50  73
4  59  71  54
DataFrame with new column:
    A   B   C  product
0  62  20  21     1240
1  20  24  47      480
2  69  48  39     3312
3  68  50  73     3400
4  59  71  54     4189


2. Create a Pandas DataFrame with 3 columns and 4 rows filled with random integers. Compute the row-wise and column-wise sum.


In [20]:
np.random.seed(100)
arr = np.random.randint(1, 101, size = (4, 3))
df = pd.DataFrame(arr, columns = ["A", "B", "C"])
print("Original DataFrame:")
display(df)
print("Row Wise Sum:")
print(df.sum(axis = 1))
print("Column Wise Sum:")
print(df.sum(axis = 0))

Original DataFrame:


Unnamed: 0,A,B,C
0,9,25,68
1,88,80,49
2,11,95,53
3,99,54,67


Row Wise Sum:
0    102
1    217
2    159
3    220
dtype: int64
Column Wise Sum:
A    207
B    254
C    237
dtype: int64


### Assignment 3: Data Cleaning

1. Create a Pandas DataFrame with 3 columns and 5 rows filled with random integers. Introduce some NaN values. Fill the NaN values with the mean of the respective columns.


In [31]:
np.random.seed(40)
arr = np.random.randint(1, 101, size = (5, 3))
df = pd.DataFrame(arr, columns = ["A", "B", "C"])
print("Original DataFrame:")
display(df)
df.at[1, "A"] = np.nan
df.at[3, "A"] = np.nan
print("DataFrame with NaN:")
display(df)
df.loc[df["A"].isnull(), "A"] = df["A"].mean()
print("DataFrame with Filled Values:")
display(df)


Original DataFrame:


Unnamed: 0,A,B,C
0,71,92,8
1,38,57,51
2,66,13,72
3,20,32,75
4,56,84,60


DataFrame with NaN:


Unnamed: 0,A,B,C
0,71.0,92,8
1,,57,51
2,66.0,13,72
3,,32,75
4,56.0,84,60


DataFrame with Filled Values:


Unnamed: 0,A,B,C
0,71.0,92,8
1,64.333333,57,51
2,66.0,13,72
3,64.333333,32,75
4,56.0,84,60


2. Create a Pandas DataFrame with 4 columns and 6 rows filled with random integers. Introduce some NaN values. Drop the rows with any NaN values.


In [33]:
np.random.seed(100)
arr = np.random.randint(1, 101, size = (6, 4))
df = pd.DataFrame(arr, columns = ["A", "B", "C", "D"])
print('Original DataFrame:')
display(df)
df.iloc[0, 1] = np.nan
df.iloc[1, 2] = np.nan
df.iloc[5, 3] = np.nan
print("Original DataFrame with NaN:")
display(df)
df = df.dropna(axis = 0)
print("DataFrame with No Nan:")
display(df)

Original DataFrame:


Unnamed: 0,A,B,C,D
0,9,25,68,88
1,80,49,11,95
2,53,99,54,67
3,99,15,35,25
4,16,61,59,17
5,10,94,87,3


Original DataFrame with NaN:


Unnamed: 0,A,B,C,D
0,9,,68.0,88.0
1,80,49.0,,95.0
2,53,99.0,54.0,67.0
3,99,15.0,35.0,25.0
4,16,61.0,59.0,17.0
5,10,94.0,87.0,


DataFrame with No Nan:


Unnamed: 0,A,B,C,D
2,53,99.0,54.0,67.0
3,99,15.0,35.0,25.0
4,16,61.0,59.0,17.0


### Assignment 4: Data Aggregation

1. Create a Pandas DataFrame with 2 columns: 'Category' and 'Value'. Fill the 'Category' column with random categories ('A', 'B', 'C') and the 'Value' column with random integers. Group the DataFrame by 'Category' and compute the sum and mean of 'Value' for each category.

In [38]:
np.random.seed(40)
category = np.random.choice(["A", "B", "C"], size = 20)
value = np.random.randint(1, 101, size = 20)
df = pd.DataFrame({"Category":category, "Value": value})
print("Original DataFrame:")
display(df)
grouped_df = df.groupby("Category")["Value"].agg(func = ["sum", "mean"])
print("Grouped DataFrame:")
display(grouped_df)

Original DataFrame:


Unnamed: 0,Category,Value
0,C,59
1,B,75
2,A,23
3,A,4
4,C,92
5,B,100
6,A,72
7,C,56
8,C,84
9,B,29


Grouped DataFrame:


Unnamed: 0_level_0,sum,mean
Category,Unnamed: 1_level_1,Unnamed: 2_level_1
A,353,44.125
B,376,62.666667
C,383,63.833333


2. Create a Pandas DataFrame with 3 columns: 'Product', 'Category', and 'Sales'. Fill the DataFrame with random data. Group the DataFrame by 'Category' and compute the total sales for each category.


In [39]:
import pandas as pd
import random

# Define possible values for products and categories
products = ["Laptop", "Smartphone", "Tablet", "Headphones", "Smartwatch", "Camera", "Monitor", "Keyboard", "Mouse", "Speaker"]
categories = ["Electronics", "Accessories"]

# Generate random data
data = {
    "Product": [random.choice(products) for _ in range(20)],
    "Category": [random.choice(categories) for _ in range(20)],
    "Sales": [round(random.uniform(100, 1000), 2) for _ in range(20)]  # Random sales between 100 and 1000
}

# Create DataFrame
df = pd.DataFrame(data)

# Display DataFrame
print("Original DataFrame:")
display(df)

grouped_df = df.groupby("Category")["Sales"].sum()
print("Grouped DataFrame: ")
display(grouped_df)


Original DataFrame:


Unnamed: 0,Product,Category,Sales
0,Camera,Electronics,154.03
1,Headphones,Accessories,690.81
2,Speaker,Accessories,357.24
3,Mouse,Accessories,617.61
4,Smartphone,Electronics,752.95
5,Smartwatch,Accessories,916.32
6,Headphones,Electronics,274.99
7,Laptop,Electronics,305.12
8,Camera,Electronics,634.9
9,Smartwatch,Accessories,821.46


Grouped DataFrame: 


Category
Accessories    6314.47
Electronics    4463.74
Name: Sales, dtype: float64

### Assignment 5: Merging DataFrames

1. Create two Pandas DataFrames with a common column. Merge the DataFrames using the common column.

In [41]:
df1 = pd.DataFrame({"Key": ["A", "B", "C"], "Value 1": [1, 2, 3]})
df2 = pd.DataFrame({"Key": ["A", "B", "D"], "Value 2":[4, 5, 6]})
print("Original DataFrame 1: ")
display(df1)
print("Original DataFrame 2:")
display(df2)
merged_df = pd.merge(df1, df2, on = "Key", how = "inner")
print("Merged DataFrame: ")
display(merged_df)

Original DataFrame 1: 


Unnamed: 0,Key,Value 1
0,A,1
1,B,2
2,C,3


Original DataFrame 2:


Unnamed: 0,Key,Value 2
0,A,4
1,B,5
2,D,6


Merged DataFrame: 


Unnamed: 0,Key,Value 1,Value 2
0,A,1,4
1,B,2,5


2. Create two Pandas DataFrames with different columns. Concatenate the DataFrames along the rows and along the columns.


In [42]:
np.random.seed(100)
df1 = pd.DataFrame({"A": np.random.randint(1, 101, size = 3), "B": np.random.randint(1, 101, size = 3)})
df2 = pd.DataFrame({"C": np.random.randint(1, 101, size = 3), "D": np.random.randint(1, 101, size = 3)})
print("Original DataFrame 1:")
display(df1)
print("Original DataFrame 2:")
display(df2)
concat_df1 = pd.concat([df1, df2], axis = 0)
concat_df2 = pd.concat([df1, df2], axis = 1)
print("Row Wise Concatenated DataFrame: ")
display(concat_df1)
print("Column Wise Concatenated DataFrame:")
display(concat_df2)


Original DataFrame 1:


Unnamed: 0,A,B
0,9,88
1,25,80
2,68,49


Original DataFrame 2:


Unnamed: 0,C,D
0,11,99
1,95,54
2,53,67


Row Wise Concatenated DataFrame: 


Unnamed: 0,A,B,C,D
0,9.0,88.0,,
1,25.0,80.0,,
2,68.0,49.0,,
0,,,11.0,99.0
1,,,95.0,54.0
2,,,53.0,67.0


Column Wise Concatenated DataFrame:


Unnamed: 0,A,B,C,D
0,9,88,11,99
1,25,80,95,54
2,68,49,53,67


### Assignment 6: Time Series Analysis

1. Create a Pandas DataFrame with a datetime index and one column filled with random integers. Resample the DataFrame to compute the monthly mean of the values.

In [53]:
np.random.seed(100)
data_range = pd.date_range(start="2022-01-01", end = "2022-12-31", freq = "D")
values = np.random.randint(100, 1000, size = 365)
df = pd.DataFrame({"date_range":data_range, "Values":values})
df.set_index("date_range", inplace = True)
print("Original DataFrame:")
display(df)
grouped_df = df.resample("ME").mean()
print("Monthly Mean Values: ")
display(grouped_df)

Original DataFrame:


Unnamed: 0_level_0,Values
date_range,Unnamed: 1_level_1
2022-01-01,620
2022-01-02,892
2022-01-03,935
2022-01-04,971
2022-01-05,955
...,...
2022-12-27,403
2022-12-28,859
2022-12-29,529
2022-12-30,893


Monthly Mean Values: 


Unnamed: 0_level_0,Values
date_range,Unnamed: 1_level_1
2022-01-31,648.870968
2022-02-28,571.285714
2022-03-31,471.870968
2022-04-30,566.466667
2022-05-31,598.806452
2022-06-30,618.133333
2022-07-31,524.580645
2022-08-31,602.064516
2022-09-30,613.833333
2022-10-31,599.870968


2.Create a Pandas DataFrame with a datetime index ranging from '2021-01-01' to '2021-12-31' and one column filled with random integers. Compute the rolling mean with a window of 7 days.


In [58]:
np.random.seed(100)
date_range = pd.date_range(start = "2021-01-01", end = "2021-12-31", freq = "D")
Values = np.random.randint(1, 101, size = 365)
df = pd.DataFrame({"Date":date_range, "Values":Values})
df.set_index("Date", inplace=True)
print("Original DataFrame: ")
display(df)
rolling_mean = df.rolling(window=7).mean()
display("Rolling Mean with a Window of 7 Days:")
display(rolling_mean)

Original DataFrame: 


Unnamed: 0_level_0,Values
Date,Unnamed: 1_level_1
2021-01-01,9
2021-01-02,25
2021-01-03,68
2021-01-04,88
2021-01-05,80
...,...
2021-12-27,46
2021-12-28,40
2021-12-29,89
2021-12-30,51


'Rolling Mean with a Window of 7 Days:'

Unnamed: 0_level_0,Values
Date,Unnamed: 1_level_1
2021-01-01,
2021-01-02,
2021-01-03,
2021-01-04,
2021-01-05,
...,...
2021-12-27,55.428571
2021-12-28,57.142857
2021-12-29,59.714286
2021-12-30,59.285714


### Assignment 7: MultiIndex DataFrame

1. Create a Pandas DataFrame with a MultiIndex (hierarchical index). Perform some basic indexing and slicing operations on the MultiIndex DataFrame.


In [67]:
arrays = [["A", "A", "B", "B"], ["one", "two", "one", "two"]]
index = pd.MultiIndex.from_arrays(arrays, names = ("Category", "Subcategory"))
df = pd.DataFrame(np.random.randint(1, 100, size = (4, 3)), index = index, columns = ["Value1", "Value2", "value3"])
print("Original DataFrame: ")
display(df)

#Indexing at Category A
print("Indexing at Category A:")
display(df.loc["A"])

# Slicing at Category B and Subcategory two
df.loc[("B", "two")]

Original DataFrame: 


Unnamed: 0_level_0,Unnamed: 1_level_0,Value1,Value2,value3
Category,Subcategory,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,one,56,97,64
A,two,71,26,96
B,one,57,59,83
B,two,77,61,18


Indexing at Category A:


Unnamed: 0_level_0,Value1,Value2,value3
Subcategory,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
one,56,97,64
two,71,26,96


Value1    77
Value2    61
value3    18
Name: (B, two), dtype: int32

2. Create a Pandas DataFrame with MultiIndex consisting of 'Category' and 'SubCategory'. Fill the DataFrame with random data and compute the sum of values for each 'Category' and 'SubCategory'.

In [70]:
np.random.seed(10)
arrays = [["A", "A", "B", "B"], ["one", "two", "one", "two"]]
index = pd.MultiIndex.from_arrays(arrays, names = ["Category", "Subcategory"])
df = pd.DataFrame(data = np.random.randint(1, 101, size = (4, 3)), index = index, columns = ["Value1", "Value2", "Value3"])
print("Original DataFrame: ")
display(df)
sum_values = df.groupby(["Category", "Subcategory"]).sum()
print("Sum Values: ")
display(sum_values)

#

Original DataFrame: 


Unnamed: 0_level_0,Unnamed: 1_level_0,Value1,Value2,Value3
Category,Subcategory,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,one,10,16,65
A,two,29,90,94
B,one,30,9,74
B,two,1,41,37


Sum Values: 


Unnamed: 0_level_0,Unnamed: 1_level_0,Value1,Value2,Value3
Category,Subcategory,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
A,one,10,16,65
A,two,29,90,94
B,one,30,9,74
B,two,1,41,37


### Assignment 8: Pivot Tables

1. Create a Pandas DataFrame with columns 'Date', 'Category', and 'Value'. Create a pivot table to compute the sum of 'Value' for each 'Category' by 'Date'.

In [74]:
#creating pandas dataframe with columns 'Date', 'Category', and 'Value'
np.random.seed(20)
date_range = pd.date_range(start = "2025-01-01", end = "2025-01-10", freq="D")
Categories = ["A", "B", "C"]
df = pd.DataFrame({"Date":np.random.choice(date_range, size = 20), "Category":np.random.choice(Categories, size = 20), "Value":np.random.randint(1, 101, size = 20)})
print("Original DataFrame:")
display(df)
pivot_df = df.pivot_table(index = "Date", columns="Category", values="Value", aggfunc = "sum")
print("Pivot Table:")
display(pivot_df)

Original DataFrame:


Unnamed: 0,Date,Category,Value
0,2025-01-04,C,19
1,2025-01-10,C,80
2,2025-01-05,B,91
3,2025-01-07,C,60
4,2025-01-08,B,72
5,2025-01-03,C,76
6,2025-01-01,B,30
7,2025-01-07,B,46
8,2025-01-09,B,7
9,2025-01-06,C,64


Pivot Table:


Category,A,B,C
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2025-01-01,25.0,30.0,34.0
2025-01-03,19.0,,76.0
2025-01-04,,,98.0
2025-01-05,,91.0,
2025-01-06,4.0,43.0,64.0
2025-01-07,,46.0,173.0
2025-01-08,,167.0,
2025-01-09,,7.0,
2025-01-10,,79.0,80.0


2. Create a Pandas DataFrame with columns 'Year', 'Quarter', and 'Revenue'. Create a pivot table to compute the mean 'Revenue' for each 'Quarter' by 'Year'.


In [76]:
df = pd.DataFrame({"Year":np.random.choice([2023, 2024, 2025], size = 12), "Quarter": np.random.choice(["Q1", "Q2", "Q3", "Q4"], size = 12),
                   "Revenue":np.random.randint(1, 1001, size = 12)})
print("Original DataFrame:")
display(df)

pivot_tab = df.pivot_table(index = "Quarter", columns = "Year", values= "Revenue", aggfunc = "mean")
print("Pivot Table:")
display(pivot_tab)

Original DataFrame:


Unnamed: 0,Year,Quarter,Revenue
0,2025,Q3,156
1,2024,Q4,691
2,2023,Q2,382
3,2023,Q4,639
4,2023,Q3,237
5,2023,Q2,715
6,2023,Q3,755
7,2025,Q4,882
8,2025,Q1,493
9,2024,Q1,582


Pivot Table:


Year,2023,2024,2025
Quarter,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Q1,,582.0,493.0
Q2,548.5,,
Q3,496.0,899.0,156.0
Q4,639.0,691.0,726.0


### Assignment 9: Applying Functions

1. Create a Pandas DataFrame with 3 columns and 5 rows filled with random integers. Apply a function that doubles the values of the DataFrame.


In [79]:
np.random.seed(20)
df = pd.DataFrame(np.random.randint(1, 101, size = (5, 3)))
print("Original DataFrame:")
display(df)
double_df = df.apply(lambda x: 2*x)
print("Double DataFrame:")
display(double_df)

Original DataFrame:


Unnamed: 0,0,1,2
0,100,91,16
1,96,29,91
2,10,21,76
3,23,72,35
4,97,41,86


Double DataFrame:


Unnamed: 0,0,1,2
0,200,182,32
1,192,58,182
2,20,42,152
3,46,144,70
4,194,82,172


2. Create a Pandas DataFrame with 3 columns and 6 rows filled with random integers. Apply a lambda function to create a new column that is the sum of the existing columns.


In [82]:
np.random.seed(1)
df = pd.DataFrame(np.random.randint(1, 101, size = (6, 3)), columns = ["A", "B", "C"])
print("Original DataFrame:")
display(df)
df["Sum"] = df.apply(lambda x : sum(x), axis = 1)
print("Modified DataFrame:")
display(df)

Original DataFrame:


Unnamed: 0,A,B,C
0,38,13,73
1,10,76,6
2,80,65,17
3,2,77,72
4,7,26,51
5,21,19,85


Modified DataFrame:


Unnamed: 0,A,B,C,Sum
0,38,13,73,124
1,10,76,6,92
2,80,65,17,162
3,2,77,72,151
4,7,26,51,84
5,21,19,85,125


### Assignment 10: Working with Text Data

1. Create a Pandas Series with 5 random text strings. Convert all the strings to uppercase.

In [84]:
text = ["apple", "banana", "cherry","date", "elderburry"]
series = pd.Series(text)
print('Original Series:')
display(series)

uppercase_series = series.str.upper()
print('Uppercase Series:')
display(uppercase_series)

Original Series:


0         apple
1        banana
2        cherry
3          date
4    elderburry
dtype: object

Uppercase Series:


0         APPLE
1        BANANA
2        CHERRY
3          DATE
4    ELDERBURRY
dtype: object

2. Create a Pandas Series with 5 random text strings. Extract the first three characters of each string.


In [89]:
names = ["Alice", "Bob", "John", "Chris", "Broad"]
series = pd.Series(names)

print("Original Series:")
display(series)
print()
three_character_series = series.apply(lambda x : x[:3])
print("Three Characters of each string:")
display(three_character_series)

Original Series:


0    Alice
1      Bob
2     John
3    Chris
4    Broad
dtype: object


Three Characters of each string:


0    Ali
1    Bob
2    Joh
3    Chr
4    Bro
dtype: object