In [1]:
import pandas as pd
from pathlib import Path

# Read data
df = pd.read_csv("transactions.csv")

# Sort by time
df = df.sort_values("Time").reset_index(drop=True)

# Split
mid = len(df) // 2
df_2022 = df.iloc[:mid]
df_2023 = df.iloc[mid:]

# Create directories
Path("data/v0").mkdir(parents=True, exist_ok=True)
Path("data/v1").mkdir(parents=True, exist_ok=True)

# Save
df_2022.to_csv("data/v0/transactions_2022.csv", index=False)
df_2023.to_csv("data/v1/transactions_2023.csv", index=False)


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
import joblib
import pandas as pd

df = pd.read_csv("data/v0/transactions_2022.csv")

X = df.drop("Class", axis=1)
y = df["Class"]

X_train, X_val, y_train, y_val = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

model = RandomForestClassifier(
    n_estimators=200,
    class_weight="balanced",
    random_state=42
)

model.fit(X_train, y_train)

val_probs = model.predict_proba(X_val)[:, 1]
print("AUC:", roc_auc_score(y_val, val_probs))

joblib.dump(model, "model.joblib")


AUC: 1.0


['model.joblib']