In [13]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

import warnings
warnings.filterwarnings('ignore')

In [15]:
random.seed(42)
np.random.seed(42)

def generate_leasing_log(n_cases=800):
    log = []
    for case_id in range(1, n_cases + 1):
        start = datetime(2024, 1, 1) + timedelta(days=random.randint(0, 120))
        ts = start
        
        amount = random.choices(
            [300_000, 700_000, 1_500_000, 4_000_000],
            weights=[0.4, 0.3, 0.2, 0.1]
        )[0]
        is_large = amount > 1_000_000
        is_huge = amount > 3_000_000
        sla_days = 3 if not is_large else 7
        sla_deadline = ts + timedelta(days=sla_days)
        
        # Этапы
        log.append([case_id, "Application Submitted", ts, "OnlineSystem", amount, sla_deadline])
        ts += timedelta(minutes=random.randint(5, 30))
        log.append([case_id, "Initial Screening", ts, "OnlineSystem", amount, sla_deadline])
        
        if is_large:
            ts += timedelta(hours=random.randint(2, 24))
            analyst = random.choice(["JuniorAnalyst", "SeniorManager"])
            log.append([case_id, "Risk Assessment", ts, analyst, amount, sla_deadline])
            if random.random() < 0.10:  # цикл
                ts += timedelta(hours=1)
                log.append([case_id, "Documents Rejected", ts, analyst, amount, sla_deadline])
                ts += timedelta(days=random.randint(1, 3))
                log.append([case_id, "Document Collection", ts, "Client", amount, sla_deadline])
        
        ts += timedelta(hours=random.randint(1, 48))
        log.append([case_id, "Document Collection", ts, "Client", amount, sla_deadline])
        
        if is_huge:
            ts += timedelta(days=random.randint(1, 6))  # возможна задержка!
            log.append([case_id, "Credit Committee Review", ts, "SeniorManager", amount, sla_deadline])
        
        ts += timedelta(hours=random.randint(2, 12))
        log.append([case_id, "Contract Preparation", ts, "Lawyer", amount, sla_deadline])
        ts += timedelta(hours=random.randint(1, 72))
        log.append([case_id, "Client Signs", ts, "Client", amount, sla_deadline])
        ts += timedelta(hours=random.randint(1, 24))
        log.append([case_id, "Disbursement", ts, "OnlineSystem", amount, sla_deadline])
        
        closed_time = ts + timedelta(minutes=10)
        sla_violated = closed_time > sla_deadline
        log.append([case_id, "Closed", closed_time, "OnlineSystem", amount, sla_deadline, sla_violated])
        
        if random.random() < 0.05:
            ts_cancel = closed_time + timedelta(days=random.randint(1, 5))
            log.append([case_id, "Cancelled", ts_cancel, "Client", amount, sla_deadline, True])
    
    df = pd.DataFrame(log, columns=[
        "case_id", "activity", "timestamp", "resource", "amount", "sla_deadline", "sla_violated"
    ])
    df["sla_violated"] = df["sla_violated"].fillna(False)
    return df.sort_values(["case_id", "timestamp"]).reset_index(drop=True)

In [17]:
df = generate_leasing_log(800)
print("Сгенерировано событий:", len(df))
print("Уникальных кейсов:", df["case_id"].nunique())

Сгенерировано событий: 5972
Уникальных кейсов: 800


In [19]:
df.head()

Unnamed: 0,case_id,activity,timestamp,resource,amount,sla_deadline,sla_violated
0,1,Application Submitted,2024-03-22 00:00:00,OnlineSystem,300000,2024-03-25,False
1,1,Initial Screening,2024-03-22 00:28:00,OnlineSystem,300000,2024-03-25,False
2,1,Document Collection,2024-03-22 18:28:00,Client,300000,2024-03-25,False
3,1,Contract Preparation,2024-03-22 23:28:00,Lawyer,300000,2024-03-25,False
4,1,Client Signs,2024-03-24 04:28:00,Client,300000,2024-03-25,False


In [25]:
df.to_csv("banking_event_log.csv", index=False)