In [7]:
import pandas as pd

In [8]:
#Task 1: Load and inspect data

df = pd.read_csv('bookstore_sales.csv') 
print("Task 1: First 5 rows of the dataset:") 
print(df.head()) 
print("\nTask 1: DataFrame info:") 
print(df.info())

#Task 2: Check for missing values

print("\nTask 2: Missing values in each column:") 
print(df.isnull().sum())

Task 1: First 5 rows of the dataset:
         Book_Title  Price Quantity_Sold Category
0     Mystery Novel  29.99            10  Mystery
1  Sci-Fi Adventure  35.50             8   Sci-Fi
2     Mystery Novel  29.99            10  Mystery
3      Romance Book    NaN             5  Romance
4      Fantasy Epic  45.00           abc  Fantasy

Task 1: DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7 entries, 0 to 6
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Book_Title     7 non-null      object 
 1   Price          6 non-null      float64
 2   Quantity_Sold  6 non-null      object 
 3   Category       7 non-null      object 
dtypes: float64(1), object(3)
memory usage: 356.0+ bytes
None

Task 2: Missing values in each column:
Book_Title       0
Price            1
Quantity_Sold    1
Category         0
dtype: int64


In [9]:
#Task 3: Handle missing values in Price

mean_price = df['Price'].mean() 
df['Price'] = df['Price'].fillna(mean_price) 
print("\nTask 3: DataFrame after filling missing Price values:") 
print(df)

#Task 4: Handle missing values in Quantity_Sold

df = df.dropna(subset=['Quantity_Sold']) 
print("\nTask 4: DataFrame after removing rows with missing Quantity_Sold:") 
print(df)


Task 3: DataFrame after filling missing Price values:
         Book_Title      Price Quantity_Sold Category
0     Mystery Novel  29.990000            10  Mystery
1  Sci-Fi Adventure  35.500000             8   Sci-Fi
2     Mystery Novel  29.990000            10  Mystery
3      Romance Book  33.496667             5  Romance
4      Fantasy Epic  45.000000           abc  Fantasy
5      History Book  25.000000            12  History
6  Sci-Fi Adventure  35.500000           NaN   Sci-Fi

Task 4: DataFrame after removing rows with missing Quantity_Sold:
         Book_Title      Price Quantity_Sold Category
0     Mystery Novel  29.990000            10  Mystery
1  Sci-Fi Adventure  35.500000             8   Sci-Fi
2     Mystery Novel  29.990000            10  Mystery
3      Romance Book  33.496667             5  Romance
4      Fantasy Epic  45.000000           abc  Fantasy
5      History Book  25.000000            12  History


In [10]:
#Task 5: Remove duplicate rows

df = df.drop_duplicates() 
print("\nTask 5: DataFrame after removing duplicates:") 
print(df)

#Task 6: Fix wrong format in Quantity_Sold

df['Quantity_Sold'] = pd.to_numeric(df['Quantity_Sold'], errors='coerce') 
df = df.dropna(subset=['Quantity_Sold']) 
print("\nTask 6: DataFrame after fixing Quantity_Sold format:") 
print(df) 
print("\nTask 6: Quantity_Sold data type:", df['Quantity_Sold'].dtype)


Task 5: DataFrame after removing duplicates:
         Book_Title      Price Quantity_Sold Category
0     Mystery Novel  29.990000            10  Mystery
1  Sci-Fi Adventure  35.500000             8   Sci-Fi
3      Romance Book  33.496667             5  Romance
4      Fantasy Epic  45.000000           abc  Fantasy
5      History Book  25.000000            12  History

Task 6: DataFrame after fixing Quantity_Sold format:
         Book_Title      Price  Quantity_Sold Category
0     Mystery Novel  29.990000           10.0  Mystery
1  Sci-Fi Adventure  35.500000            8.0   Sci-Fi
3      Romance Book  33.496667            5.0  Romance
5      History Book  25.000000           12.0  History

Task 6: Quantity_Sold data type: float64


In [11]:
#Task 7: Sort the data

df_sorted = df.sort_values('Price', ascending=False) 
print("\nTask 7: DataFrame sorted by Price (highest to lowest):") 
print(df_sorted)

#Task 8: Filter the data

df_filtered = df[df['Category'].isin(['Mystery', 'Sci-Fi'])] 
print("\nTask 8: Books in Mystery or Sci-Fi categories:") 
print(df_filtered)


Task 7: DataFrame sorted by Price (highest to lowest):
         Book_Title      Price  Quantity_Sold Category
1  Sci-Fi Adventure  35.500000            8.0   Sci-Fi
3      Romance Book  33.496667            5.0  Romance
0     Mystery Novel  29.990000           10.0  Mystery
5      History Book  25.000000           12.0  History

Task 8: Books in Mystery or Sci-Fi categories:
         Book_Title  Price  Quantity_Sold Category
0     Mystery Novel  29.99           10.0  Mystery
1  Sci-Fi Adventure  35.50            8.0   Sci-Fi


In [12]:
#Task 9: Calculate basic statistics

avg_price = df['Price'].mean() 
max_quantity = df['Quantity_Sold'].max() 
print("\nTask 9: Average Price:", avg_price) 
print("Task 9: Maximum Quantity Sold:", max_quantity)

#Task 10: Create a new column

df['Total_Revenue'] = df['Price'] * df['Quantity_Sold'] 
print("\nTask 10: Final DataFrame with Total_Revenue column:") 
print(df)


Task 9: Average Price: 30.996666666666663
Task 9: Maximum Quantity Sold: 12.0

Task 10: Final DataFrame with Total_Revenue column:
         Book_Title      Price  Quantity_Sold Category  Total_Revenue
0     Mystery Novel  29.990000           10.0  Mystery     299.900000
1  Sci-Fi Adventure  35.500000            8.0   Sci-Fi     284.000000
3      Romance Book  33.496667            5.0  Romance     167.483333
5      History Book  25.000000           12.0  History     300.000000
