# **Practicing pandas**

**✅ Beginner-Friendly Pandas Practice Questions**

In [26]:
import numpy as np
import pandas as pd

In [27]:
dates = pd.date_range(start="2023-05-01", periods=10, freq="1D").strftime("%Y-%m-%d").tolist()

data = {
    "CustomerID": [i for i in range(1, 11)],
    "CustomerName": ["Alice Smith","Bob Johnson","Charlie Lee","Diana Green","Ethan Brown","Farah Khan",
                     "George Miller","Hannah Davis","Ivan Petrov","Julia Lopez"],
    "Country": ["USA", "Canada", "UK", "Sweden", "Brazil", "India", "USA", "Canada", "Russia", "Mexico"],
    "Age": [25, 30, 35, 28, 40, 22, 33, 27, 29, 31],
    "Gender": ["F", "M", "M", "F", "M", "F", "M", "F", "M", "F"],
    "Product": ["Laptop", "Smartphone", "Headphones", "Tablet", "Laptop", 
                "Smartphone", "Smartwatch", "Headphones", "Laptop", "Smartphone"],
    "Quantity": [1, 2, 3, 1, 2, 1, 2, 4, 1, 2],
    "Price": [1200, 800, 150, 600, 1100, 900, 300, 120, 1300, 850],
    "PurchaseDate": dates
}

df = pd.DataFrame(data)

In [28]:
df

Unnamed: 0,CustomerID,CustomerName,Country,Age,Gender,Product,Quantity,Price,PurchaseDate
0,1,Alice Smith,USA,25,F,Laptop,1,1200,2023-05-01
1,2,Bob Johnson,Canada,30,M,Smartphone,2,800,2023-05-02
2,3,Charlie Lee,UK,35,M,Headphones,3,150,2023-05-03
3,4,Diana Green,Sweden,28,F,Tablet,1,600,2023-05-04
4,5,Ethan Brown,Brazil,40,M,Laptop,2,1100,2023-05-05
5,6,Farah Khan,India,22,F,Smartphone,1,900,2023-05-06
6,7,George Miller,USA,33,M,Smartwatch,2,300,2023-05-07
7,8,Hannah Davis,Canada,27,F,Headphones,4,120,2023-05-08
8,9,Ivan Petrov,Russia,29,M,Laptop,1,1300,2023-05-09
9,10,Julia Lopez,Mexico,31,F,Smartphone,2,850,2023-05-10


### **1 Basic Selection**

    1. Show only the names of customers from USA.

In [29]:
df[df["Country"] == "USA"]["CustomerName"]

0      Alice Smith
6    George Miller
Name: CustomerName, dtype: object

    2. Show the CustomerName and Product columns only.

In [30]:
df[["CustomerName", "Product"]]

Unnamed: 0,CustomerName,Product
0,Alice Smith,Laptop
1,Bob Johnson,Smartphone
2,Charlie Lee,Headphones
3,Diana Green,Tablet
4,Ethan Brown,Laptop
5,Farah Khan,Smartphone
6,George Miller,Smartwatch
7,Hannah Davis,Headphones
8,Ivan Petrov,Laptop
9,Julia Lopez,Smartphone


### **2 Filtering**

In [31]:
df.columns

Index(['CustomerID', 'CustomerName', 'Country', 'Age', 'Gender', 'Product',
       'Quantity', 'Price', 'PurchaseDate'],
      dtype='object')

    1. Find all customers who bought a Laptop.

In [32]:
df[df["Product"] == "Laptop"]["CustomerName"]

0    Alice Smith
4    Ethan Brown
8    Ivan Petrov
Name: CustomerName, dtype: object

    2. Find all customers older than 30 years.

In [33]:
df[df["Age"] >= 30][["CustomerName", "Age"]]

Unnamed: 0,CustomerName,Age
1,Bob Johnson,30
2,Charlie Lee,35
4,Ethan Brown,40
6,George Miller,33
9,Julia Lopez,31


### **3 Sorting**

    1. Sort the customers by Age.

In [34]:
df.sort_values("Age", ascending=True)[["CustomerName", "Age"]].head()

Unnamed: 0,CustomerName,Age
5,Farah Khan,22
0,Alice Smith,25
7,Hannah Davis,27
3,Diana Green,28
8,Ivan Petrov,29


    2. Sort by Price in descending order.

In [35]:
df.sort_values("Price", ascending=False)[["CustomerName", "Price"]].head()

Unnamed: 0,CustomerName,Price
8,Ivan Petrov,1300
0,Alice Smith,1200
4,Ethan Brown,1100
5,Farah Khan,900
9,Julia Lopez,850


### **4 Aggregation**

    1. Find the average age of customers.

In [36]:
## Manually
average = df["Age"].sum() / len(df["Age"])
print(average) # 30.0

## using ".mean()" func
print(df["Age"].mean())  # also 30.0

30.0
30.0


    2. Find the total quantity of products sold.

In [37]:
print(df["Quantity"].sum())
print(df["Quantity"].agg("sum"))# Aggregations method

19
19


    3. Find the maximum price of any product.

In [38]:
print(df["Price"].max())
print(df["Price"].agg("max"))# Aggregations method

1300
1300


### **5 GroupBy**

In [39]:
df.columns

Index(['CustomerID', 'CustomerName', 'Country', 'Age', 'Gender', 'Product',
       'Quantity', 'Price', 'PurchaseDate'],
      dtype='object')

    1. Find the total Quantity purchased by each Country.

In [40]:
total = df.groupby("Country", as_index=False)["Quantity"].sum()
print(total.head(2), "\n")
print(total.to_string(index=False)) # removing the default index number

  Country  Quantity
0  Brazil         2
1  Canada         6 

Country  Quantity
 Brazil         2
 Canada         6
  India         1
 Mexico         2
 Russia         1
 Sweden         1
     UK         3
    USA         3


    2. Find the average Price of products by Gender.

In [41]:
df.columns

Index(['CustomerID', 'CustomerName', 'Country', 'Age', 'Gender', 'Product',
       'Quantity', 'Price', 'PurchaseDate'],
      dtype='object')

*"mean()" func help to calculate the average*

In [42]:
average_price = df.groupby("Gender", as_index=False)["Price"].mean()
# print(average_price)
print(average_price.to_string(index=False))

Gender  Price
     F  734.0
     M  730.0


### **6 New Columns**

    1. Create a new column TotalAmount = Quantity × Price.

In [43]:
df["TotalAmount"] = df["Quantity"] * df["Price"]
new_column = df[["CustomerName", "Quantity", "TotalAmount"]]
print(new_column.to_string(index=False))

 CustomerName  Quantity  TotalAmount
  Alice Smith         1         1200
  Bob Johnson         2         1600
  Charlie Lee         3          450
  Diana Green         1          600
  Ethan Brown         2         2200
   Farah Khan         1          900
George Miller         2          600
 Hannah Davis         4          480
  Ivan Petrov         1         1300
  Julia Lopez         2         1700


    2. Find which customer spent the highest total amount.

In [44]:
highest_spender = df[df["TotalAmount"] == df["TotalAmount"].max()]["CustomerName"]
print(f"\nHighest Spender Name: '{highest_spender.to_string(index=False)}' Spanded: ${df["TotalAmount"].max()}")


Highest Spender Name: 'Ethan Brown' Spanded: $2200


### **7 Date Handling**

In [45]:
df.head()

Unnamed: 0,CustomerID,CustomerName,Country,Age,Gender,Product,Quantity,Price,PurchaseDate,TotalAmount
0,1,Alice Smith,USA,25,F,Laptop,1,1200,2023-05-01,1200
1,2,Bob Johnson,Canada,30,M,Smartphone,2,800,2023-05-02,1600
2,3,Charlie Lee,UK,35,M,Headphones,3,150,2023-05-03,450
3,4,Diana Green,Sweden,28,F,Tablet,1,600,2023-05-04,600
4,5,Ethan Brown,Brazil,40,M,Laptop,2,1100,2023-05-05,2200


    1. Convert PurchaseDate into a datetime column.

In [46]:
### Convert them into datetime (so pandas knows they are dates, not strings)
# print(type(df["PurchaseDate"])) # <class 'pandas.core.series.Series'>

df["PurchaseDate"] = pd.to_datetime(df["PurchaseDate"]) # dtype: datetime64[ns]

In [47]:
df[df["PurchaseDate"] > "2023-05-05"]["PurchaseDate"]

5   2023-05-06
6   2023-05-07
7   2023-05-08
8   2023-05-09
9   2023-05-10
Name: PurchaseDate, dtype: datetime64[ns]

    2. Find all purchases made in September 2023.
Find purchases in May 2023 (all of them in your case)

In [48]:
df[df["PurchaseDate"].dt.month == 5]

Unnamed: 0,CustomerID,CustomerName,Country,Age,Gender,Product,Quantity,Price,PurchaseDate,TotalAmount
0,1,Alice Smith,USA,25,F,Laptop,1,1200,2023-05-01,1200
1,2,Bob Johnson,Canada,30,M,Smartphone,2,800,2023-05-02,1600
2,3,Charlie Lee,UK,35,M,Headphones,3,150,2023-05-03,450
3,4,Diana Green,Sweden,28,F,Tablet,1,600,2023-05-04,600
4,5,Ethan Brown,Brazil,40,M,Laptop,2,1100,2023-05-05,2200
5,6,Farah Khan,India,22,F,Smartphone,1,900,2023-05-06,900
6,7,George Miller,USA,33,M,Smartwatch,2,300,2023-05-07,600
7,8,Hannah Davis,Canada,27,F,Headphones,4,120,2023-05-08,480
8,9,Ivan Petrov,Russia,29,M,Laptop,1,1300,2023-05-09,1300
9,10,Julia Lopez,Mexico,31,F,Smartphone,2,850,2023-05-10,1700


### **Creating New CSV File**

In [51]:
df.head()

Unnamed: 0,CustomerID,CustomerName,Country,Age,Gender,Product,Quantity,Price,PurchaseDate,TotalAmount
0,1,Alice Smith,USA,25,F,Laptop,1,1200,2023-05-01,1200
1,2,Bob Johnson,Canada,30,M,Smartphone,2,800,2023-05-02,1600
2,3,Charlie Lee,UK,35,M,Headphones,3,150,2023-05-03,450
3,4,Diana Green,Sweden,28,F,Tablet,1,600,2023-05-04,600
4,5,Ethan Brown,Brazil,40,M,Laptop,2,1100,2023-05-05,2200


In [57]:
df.to_csv("customer.csv", index=False)

    ================== The END ==================