In [13]:
import pandas as pd
import numpy as np

df = pd.read_csv("transactions.csv")
print("=== LOADED DATASET ===")
print(df)
print(f"\nShape: {df.shape}")


=== LOADED DATASET ===
  transaction_id  price  quantity
0          T5001   1500         2
1          T5002    500        10
2          T5003  12000         1
3          T5004   1500         5
4          T5005  12000         3

Shape: (5, 3)


In [14]:
#inspect data types
print("\n=== DATA TYPES ===")
print(df.dtypes)



=== DATA TYPES ===
transaction_id      str
price             int64
quantity          int64
dtype: object


In [15]:
#convert to Numpy arrays
#Convert price and quantity to NumPy arrays:
# the method .values converts df column values into Numpy arrays
prices = df["price"].values
quantities = df["quantity"].values

print("=== NUMPY ARRAYS ===")
print(f"Prices array: {prices}")
print(f"Quantities array: {quantities}")
print(f"Prices dtype: {prices.dtype}")
print(f"Quantities dtype: {quantities.dtype}")


=== NUMPY ARRAYS ===
Prices array: [ 1500   500 12000  1500 12000]
Quantities array: [ 2 10  1  5  3]
Prices dtype: int64
Quantities dtype: int64


In [16]:
#Verify arrays are NumPy arrays:
print(f"\nPrices is NumPy array: {isinstance(prices, np.ndarray)}")
print(f"Quantities is NumPy array: {isinstance(quantities, np.ndarray)}")




Prices is NumPy array: True
Quantities is NumPy array: True


In [17]:
#Compute revenue (vectorized):
revenue = prices * quantities
print("=== REVENUE CALCULATION ===")
print("Revenue:", revenue)
print("No loops used! ✓")


=== REVENUE CALCULATION ===
Revenue: [ 3000  5000 12000  7500 36000]
No loops used! ✓


In [18]:
#Add revenue to DataFrame:
df["revenue"] = revenue
print("\n=== DATAFRAME WITH REVENUE ===")
print(df)



=== DATAFRAME WITH REVENUE ===
  transaction_id  price  quantity  revenue
0          T5001   1500         2     3000
1          T5002    500        10     5000
2          T5003  12000         1    12000
3          T5004   1500         5     7500
4          T5005  12000         3    36000


In [19]:
#Classify transactions using vectorized conditional logic.
#Apply rules using np.where():
categories = np.where(
    revenue > 10000,
    "HIGH",
    np.where(
        revenue >= 2000,
        "MEDIUM",
        "LOW"
    )
)

print("=== CLASSIFICATION ===")
print("Categories:", categories)


=== CLASSIFICATION ===
Categories: ['MEDIUM' 'MEDIUM' 'HIGH' 'MEDIUM' 'HIGH']


In [20]:
#Verify classifications:
print("\n=== CLASSIFICATION VERIFICATION ===")
for i in range(len(revenue)):
    print(f"Transaction {df.iloc[i]['transaction_id']}: "
          f"Revenue={revenue[i]:,} → Category={categories[i]}")



=== CLASSIFICATION VERIFICATION ===
Transaction T5001: Revenue=3,000 → Category=MEDIUM
Transaction T5002: Revenue=5,000 → Category=MEDIUM
Transaction T5003: Revenue=12,000 → Category=HIGH
Transaction T5004: Revenue=7,500 → Category=MEDIUM
Transaction T5005: Revenue=36,000 → Category=HIGH


In [21]:
#Add category to DataFrame:
df["category"] = categories
print("\n=== FINAL DATAFRAME ===")
print(df)



=== FINAL DATAFRAME ===
  transaction_id  price  quantity  revenue category
0          T5001   1500         2     3000   MEDIUM
1          T5002    500        10     5000   MEDIUM
2          T5003  12000         1    12000     HIGH
3          T5004   1500         5     7500   MEDIUM
4          T5005  12000         3    36000     HIGH


In [22]:
# Prodcue final output
output_df = df[["transaction_id", "revenue", "category"]].copy()
print("=== OUTPUT DATAFRAME ===")
print(output_df)


=== OUTPUT DATAFRAME ===
  transaction_id  revenue category
0          T5001     3000   MEDIUM
1          T5002     5000   MEDIUM
2          T5003    12000     HIGH
3          T5004     7500   MEDIUM
4          T5005    36000     HIGH


In [23]:
# save to csv
output_df.to_csv("transaction_classification.csv", index=False)
print("\nSaved: transaction_classification.csv")




Saved: transaction_classification.csv


In [24]:
#Display summary statastics
print("\n=== SUMMARY STATISTICS ===")
print(f"Total transactions: {len(output_df)}")
print(f"Total revenue: {output_df['revenue'].sum():,.2f}")
print(f"Average revenue: {output_df['revenue'].mean():,.2f}")
print(f"\nCategory distribution:")
print(output_df["category"].value_counts())



=== SUMMARY STATISTICS ===
Total transactions: 5
Total revenue: 63,500.00
Average revenue: 12,700.00

Category distribution:
category
MEDIUM    3
HIGH      2
Name: count, dtype: int64
