### Answer all the questions below based on retailer dataset.

Example: 1. Load the CSV file and display the first 3 rows.

In [3]:
import pandas as pd
df = pd.read_csv('retail_orders_with_missing.csv')
print(df.head(3))

   order_id customer product  quantity    price    city
0       101    Alice  Laptop       1.0  55000.0   Delhi
1       102      Bob  Mobile       2.0  20000.0     NaN
2       103      NaN  Tablet       NaN  15000.0  Mumbai


2. Show the shape and basic summary of the DataFrame.

In [4]:
df.describe()

Unnamed: 0,order_id,quantity,price
count,6.0,4.0,5.0
mean,103.5,1.75,27600.0
std,1.870829,0.957427,16318.700929
min,101.0,1.0,15000.0
25%,102.25,1.0,18000.0
50%,103.5,1.5,20000.0
75%,104.75,2.25,30000.0
max,106.0,3.0,55000.0


3. Display the number of missing values in every column.

In [5]:
missing_values = df.isnull().sum()
print(missing_values)

order_id    0
customer    1
product     1
quantity    2
price       1
city        1
dtype: int64


4. Select only 'customer' and 'product' columns.

In [6]:
df_selected = df[['customer', 'product']]
print(df_selected)

  customer product
0    Alice  Laptop
1      Bob  Mobile
2      NaN  Tablet
3    David  Laptop
4     Esha     NaN
5   Farhan  Mobile


5. Filter the rows where quantity is greater than 1.

In [7]:
filtered_df = df[df['quantity'] > 1]
print(filtered_df)

   order_id customer product  quantity    price     city
1       102      Bob  Mobile       2.0  20000.0      NaN
4       105     Esha     NaN       3.0  30000.0  Chennai


6. Fill missing quantities using the mean quantity.

In [8]:
mean_quantity = df['quantity'].mean()
df['quantity'] = df['quantity'].fillna(mean_quantity)
print(df)

   order_id customer product  quantity    price     city
0       101    Alice  Laptop      1.00  55000.0    Delhi
1       102      Bob  Mobile      2.00  20000.0      NaN
2       103      NaN  Tablet      1.75  15000.0   Mumbai
3       104    David  Laptop      1.00      NaN    Delhi
4       105     Esha     NaN      3.00  30000.0  Chennai
5       106   Farhan  Mobile      1.75  18000.0   Mumbai


7. Drop rows where the 'product' column is missing.

In [9]:
df_cleaned = df.dropna(subset=['product'])
print(df_cleaned)


   order_id customer product  quantity    price    city
0       101    Alice  Laptop      1.00  55000.0   Delhi
1       102      Bob  Mobile      2.00  20000.0     NaN
2       103      NaN  Tablet      1.75  15000.0  Mumbai
3       104    David  Laptop      1.00      NaN   Delhi
5       106   Farhan  Mobile      1.75  18000.0  Mumbai


8. Sort the orders based on price in ascending order.

In [10]:
df_sorted = df.sort_values(by='price', ascending=True)
print(df_sorted)


   order_id customer product  quantity    price     city
2       103      NaN  Tablet      1.75  15000.0   Mumbai
5       106   Farhan  Mobile      1.75  18000.0   Mumbai
1       102      Bob  Mobile      2.00  20000.0      NaN
4       105     Esha     NaN      3.00  30000.0  Chennai
0       101    Alice  Laptop      1.00  55000.0    Delhi
3       104    David  Laptop      1.00      NaN    Delhi


9. Count how many orders came from each city.

In [11]:
city_counts = df['city'].value_counts()
print(city_counts)


city
Delhi      2
Mumbai     2
Chennai    1
Name: count, dtype: int64


10. Add a new column 'total_amount' = quantity * price.

In [12]:
df['total_amount'] = df['quantity'] * df['price']
print(df)

   order_id customer product  quantity    price     city  total_amount
0       101    Alice  Laptop      1.00  55000.0    Delhi       55000.0
1       102      Bob  Mobile      2.00  20000.0      NaN       40000.0
2       103      NaN  Tablet      1.75  15000.0   Mumbai       26250.0
3       104    David  Laptop      1.00      NaN    Delhi           NaN
4       105     Esha     NaN      3.00  30000.0  Chennai       90000.0
5       106   Farhan  Mobile      1.75  18000.0   Mumbai       31500.0


1. Replace missing price values with the median price.

In [13]:
median_price = df['price'].median()
df['price'] = df['price'].fillna(median_price)
print(df)

   order_id customer product  quantity    price     city  total_amount
0       101    Alice  Laptop      1.00  55000.0    Delhi       55000.0
1       102      Bob  Mobile      2.00  20000.0      NaN       40000.0
2       103      NaN  Tablet      1.75  15000.0   Mumbai       26250.0
3       104    David  Laptop      1.00  20000.0    Delhi           NaN
4       105     Esha     NaN      3.00  30000.0  Chennai       90000.0
5       106   Farhan  Mobile      1.75  18000.0   Mumbai       31500.0


2. Group orders by product and calculate the total quantity sold.

In [14]:
product_summary = df.groupby('product')['quantity'].sum()
print(product_summary)

product
Laptop    2.00
Mobile    3.75
Tablet    1.75
Name: quantity, dtype: float64


3. Drop rows where more than 2 columns have missing values.

In [15]:
df_cleaned = df.dropna(thresh=(df.shape[1] - 2))
print(df_cleaned)

   order_id customer product  quantity    price     city  total_amount
0       101    Alice  Laptop      1.00  55000.0    Delhi       55000.0
1       102      Bob  Mobile      2.00  20000.0      NaN       40000.0
2       103      NaN  Tablet      1.75  15000.0   Mumbai       26250.0
3       104    David  Laptop      1.00  20000.0    Delhi           NaN
4       105     Esha     NaN      3.00  30000.0  Chennai       90000.0
5       106   Farhan  Mobile      1.75  18000.0   Mumbai       31500.0


4. Create a new column 'order_type' = 'High Value' if price > 30000 else 'Regular'.

In [16]:
import numpy as np
df['order_type'] = np.where(df['price'] > 30000, 'High Value', 'Regular')
print(df)


   order_id customer product  quantity    price     city  total_amount  \
0       101    Alice  Laptop      1.00  55000.0    Delhi       55000.0   
1       102      Bob  Mobile      2.00  20000.0      NaN       40000.0   
2       103      NaN  Tablet      1.75  15000.0   Mumbai       26250.0   
3       104    David  Laptop      1.00  20000.0    Delhi           NaN   
4       105     Esha     NaN      3.00  30000.0  Chennai       90000.0   
5       106   Farhan  Mobile      1.75  18000.0   Mumbai       31500.0   

   order_type  
0  High Value  
1     Regular  
2     Regular  
3     Regular  
4     Regular  
5     Regular  


5. Replace missing city values with the most frequent city.

In [17]:
most_frequent_city = df['city'].mode()[0]
df['city'] = df['city'].fillna(most_frequent_city)
print(df)


   order_id customer product  quantity    price     city  total_amount  \
0       101    Alice  Laptop      1.00  55000.0    Delhi       55000.0   
1       102      Bob  Mobile      2.00  20000.0    Delhi       40000.0   
2       103      NaN  Tablet      1.75  15000.0   Mumbai       26250.0   
3       104    David  Laptop      1.00  20000.0    Delhi           NaN   
4       105     Esha     NaN      3.00  30000.0  Chennai       90000.0   
5       106   Farhan  Mobile      1.75  18000.0   Mumbai       31500.0   

   order_type  
0  High Value  
1     Regular  
2     Regular  
3     Regular  
4     Regular  
5     Regular  


6. Convert the customer names to uppercase.

In [18]:
df['customer'] = df['customer'].str.upper()
print(df)

   order_id customer product  quantity    price     city  total_amount  \
0       101    ALICE  Laptop      1.00  55000.0    Delhi       55000.0   
1       102      BOB  Mobile      2.00  20000.0    Delhi       40000.0   
2       103      NaN  Tablet      1.75  15000.0   Mumbai       26250.0   
3       104    DAVID  Laptop      1.00  20000.0    Delhi           NaN   
4       105     ESHA     NaN      3.00  30000.0  Chennai       90000.0   
5       106   FARHAN  Mobile      1.75  18000.0   Mumbai       31500.0   

   order_type  
0  High Value  
1     Regular  
2     Regular  
3     Regular  
4     Regular  
5     Regular  


7. Export the cleaned DataFrame to a new CSV file.

In [19]:
df.to_csv('ahrorjon_answers.csv', index=False)
print('CSV createed')

CSV createed
