## Import Packages

In [7]:
import pandas as pd
from dataclasses import dataclass
from datetime import datetime
import re


## Step 1: Load raw CSV and display first 3 rows

In [8]:
df = pd.read_csv("data/Sales Records.csv")
print("Header:", df.columns.tolist())
print(df.head(3))


Header: ['date', 'customer_id', 'product_id', 'price', 'quantity', 'coupon_code', 'shipping_city']
         date customer_id product_id    price  quantity coupon_code  \
0  2023-10-30    CUST0086       P001  1253.52         1    FREESHIP   
1  2024-05-24    CUST0075       P020   253.25         2         NaN   
2  2023-10-06    CUST0079       P007    58.91         2         NaN   

  shipping_city  
0  Jacksonville  
1      New York  
2        Denver  


## Step 2: Pick the Right Container

Considering the three options to store each row: dictionaries, namedtuples, and classes.  
Classes are the way to go because they let us add logic like cleaning up data, transforming it, or calculating totals. Plus, they keep things neat and reusable for each transaction.


# structure and define Transaction class

In [9]:


@dataclass
class Transaction:
    date: str
    customer_id: str
    product_id: str
    price: float
    quantity: int
    coupon_code: str
    shipping_city: str

    def clean(self):
        try:
            self.price = float(self.price)
            if self.price < 0:
                self.price = 0
        except:
            self.price = 0
        self.coupon_code = self.coupon_code.upper() if isinstance(self.coupon_code, str) else "nan"

    def total(self):
        return self.price * self.quantity if self.price else 0


In [10]:
def load_transactions(path: str) -> list[Transaction]:
    df = pd.read_csv(path)
    transactions = []
    for _, row in df.iterrows():        
        row_dict = row.to_dict()
        transaction = Transaction(**row_dict)    
        transactions.append(transaction)
    return transactions

In [None]:
def main():
    tx = load_transactions("data/Sales Records.csv")



In [12]:
if __name__ == "__main__":
    main()

hello
Loaded [Transaction(date='2023-10-30', customer_id='CUST0086', product_id='P001', price=1253.52, quantity=1, coupon_code='FREESHIP', shipping_city='Jacksonville'), Transaction(date='2024-05-24', customer_id='CUST0075', product_id='P020', price=253.25, quantity=2, coupon_code=nan, shipping_city='New York'), Transaction(date='2023-10-06', customer_id='CUST0079', product_id='P007', price=58.91, quantity=2, coupon_code=nan, shipping_city='Denver'), Transaction(date='2024-02-03', customer_id='CUST0147', product_id='P020', price=260.87, quantity=1, coupon_code=nan, shipping_city='Columbus'), Transaction(date='2023-12-17', customer_id='CUST0126', product_id='P017', price=921.16, quantity=2, coupon_code='SUMMER20', shipping_city='Houston'), Transaction(date='2023-06-04', customer_id='CUST0028', product_id='P011', price=118.52, quantity=2, coupon_code=nan, shipping_city='Denver'), Transaction(date='2023-11-06', customer_id='CUST0053', product_id='P024', price=73.81, quantity=1, coupon_cod