In [4]:
from pydantic import BaseModel, ValidationError, condecimal
from typing import List
from datetime import datetime
import pandas as pd

# Definierar Pydantic-modellen
class Transaction(BaseModel):
    sender_account: str
    receiver_account: str
    amount: condecimal(gt=0)
    currency: str
    timestamp: datetime


# Läser in CSV-filen
df = pd.read_csv("data/transactions.csv")

valid_rows = []
invalid_rows = []

# Validerar varje rad med Pydantic
for i, row in df.iterrows():
    try:
        tx = Transaction(**row.to_dict())
        valid_rows.append(row)
    except ValidationError as e:
        error_row = row.copy()
        error_row["error"] = str(e)
        invalid_rows.append(error_row)

# Konverterar till separata DataFrames
df_valid = pd.DataFrame(valid_rows)
df_invalid = pd.DataFrame(invalid_rows)

# Sparar resultaten
df_valid.to_csv("output/valid_transactions.csv", index=False)
df_invalid.to_csv("output/invalid_transactions.csv", index=False)

print(f"✅ Totalt antal rader: {len(df)}")
print(f"✅ Giltiga: {len(df_valid)}")
print(f"❌ Ogiltiga: {len(df_invalid)} (sparade i output/invalid_transactions.csv)")


✅ Всего строк: 100000
✅ Валидные: 100000
❌ Невалидные: 0 (сохранены в output/invalid_transactions.csv)


In [3]:
import os

os.remove("output/valid_transactions.csv")
os.remove("output/invalid_transactions.csv")
