## Валидатор конфигураций `ConfigsValidator`
- модуль `data_generator.validator`

**Тест валидатора конфигураций**

In [1]:
import os
import yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

os.chdir("..")

# Общие настройки
with open("./config/base.yaml") as f:
    base_cfg = yaml.safe_load(f)
# Настройки легальных транз.
with open("./config/legit.yaml") as f:
    legit_cfg = yaml.safe_load(f)
# Общие настройки фрода
with open("./config/fraud.yaml") as f:
    fraud_cfg = yaml.safe_load(f)
# Настройки дроп фрода
with open("./config/drops.yaml", encoding="utf8") as f:
    drop_cfg = yaml.safe_load(f)
# Настройки времени
with open("./config/time.yaml") as f:
    time_cfg = yaml.safe_load(f)

In [2]:
from data_generator.validator import ConfigsValidator

cfg_validator = ConfigsValidator(base_cfg, legit_cfg, fraud_cfg, drop_cfg)

**get_total_clients**

In [3]:
cfg_validator.get_total_clients()

5369

**estimate_legit_clients**

In [3]:
cfg_validator.estimate_legit_clients()

7500

**model_legit_txns_num_dist**

In [3]:
legit_dist = cfg_validator.model_legit_txns_num_dist()
legit_dist.ppf(0.99), legit_dist.mean(), legit_dist.std()

(np.float64(10822.162378214074),
 np.float64(10337.778922506115),
 np.float64(208.21626082370474))

**estimate_legit_txns_max**

In [3]:
cfg_validator.estimate_legit_txns_max()

10981

**estimate_compr_clients**

In [3]:
cfg_validator.estimate_compr_clients()

12208

**validate_legit_txn_num**

In [7]:
cfg_validator.validate_legit_txn_num()

ValueError: Desired legit txns number is too large: 150000.
            Clients number needed for generation: 7500.
            Available clients: 5369.
            Either reduce legit txns number or increase avg txn number per client
            or both.

**validate_comp_rate**

In [2]:
cfg_validator.validate_comp_rate()

ValueError: Estimated maximum possible number of clients that might be needed to generate
            'compromised client' fraud transactions: 1012.
            Estimated number of clients required for generating legitimate transactions: 1000.

            The number of compromised fraud clients cannot be less than the number required
            for legitimate transaction generation.

            Please either:
            1. Decrease the 'compr_client' rate and/or the overall fraud rate in configs/fraud.yaml
            2. Reduce the average number of legitimate transactions per client in legit.yaml
            

**validate_drops_rate**

In [5]:
cfg_validator.validate_drops_rate()

ValueError: Total clients number needed for drop fraud generation
            exceeds the available clients number.
            Clients number needed for drops: 654
            Available clients: 0
            Legit txns clients: 7500
            Compr fraud clients: 12208
            Please eitehr:
            1. Reduce total fraud rate.
            2. Reduce legit txns number

**validate_all**

In [1]:
import os
import yaml
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

os.chdir("..")

# Общие настройки
with open("./config/base.yaml") as f:
    base_cfg = yaml.safe_load(f)
# Настройки легальных транз.
with open("./config/legit.yaml") as f:
    legit_cfg = yaml.safe_load(f)
# Общие настройки фрода
with open("./config/fraud.yaml") as f:
    fraud_cfg = yaml.safe_load(f)
# Настройки дроп фрода
with open("./config/drops.yaml", encoding="utf8") as f:
    drop_cfg = yaml.safe_load(f)
# Настройки времени
with open("./config/time.yaml") as f:
    time_cfg = yaml.safe_load(f)
    
from data_generator.validator import ConfigsValidator

cfg_validator = ConfigsValidator(base_cfg, legit_cfg, fraud_cfg, drop_cfg)

In [2]:
cfg_validator.validate_all()

Legit txns number config is OK
Compromised fraud rate config is OK
Drop fraud rate config is OK


AssertionError: general_diff must not be lower than online_time_diff.
            4 vs 6 Check configs in legit.yaml

**assert_legit_time_limits**

In [7]:
cfg_validator.assert_legit_time_limits()

Legit time limits config is OK


In [None]:

legit_cfg: dict. Конфиги из .yaml
base_cfg: dict. Конфиги из .yaml
fraud_cfg: dict. Конфиги из .yaml
drop_cfg: dict. Конфиги из .yaml
total_clients: dict. Конфиги из .yaml

# Класс `AllTxnsRecorder`
- модуль `data_generator.recorder`
- Сборка целого датафрейма из всех созданных транзакций: легальных, фрода все типов
- Запись собранного датафрейма в файл в двух директориях: текущей генерации и последней генерации

In [1]:
import os
import pandas as pd
os.chdir("..")

from data_generator.utils import load_configs
from data_generator.recorder import AllTxnsRecorder


# Общие настройки
base_cfg = load_configs("./config/base.yaml")
# Настройки легальных транзакций
legit_cfg = load_configs("./config/legit.yaml")
compr_cfg = load_configs("./config/compr.yaml")
# Настройки дроп фрода
drops_cfg = load_configs("./config/drops.yaml")

run_dir = "./data/generated/history/generation_run_2025-07-21_154309" # make_dir_for_run(base_cfg=base_cfg)
all_recorder = AllTxnsRecorder(base_cfg, legit_cfg, compr_cfg, drops_cfg, \
                         run_dir)
print(run_dir)
print("ready for test")

./data/generated/history/generation_run_2025-07-21_154309
ready for test


In [2]:
# 
leg_fm_recorder = all_recorder.read_legit()
leg_fm_recorder.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,2853,2025-01-01 00:43:00,1735692180,1658.84,purchase,ecom,shopping_net,True,6952.0,Магнитогорск,53.407189,58.979143,2.60.10.139,4887.0,,False,False,approved,not applicable
1,858,2025-01-01 00:51:00,1735692660,1131.71,purchase,POS,gas_transport,False,4438.0,Барнаул,53.294364,83.771525,not applicable,,,False,False,approved,not applicable


In [3]:
leg_path = os.path.join(run_dir, "legit", "legit_txns.parquet")
leg_fm_direct = pd.read_parquet(leg_path)
leg_fm_direct.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,2853,2025-01-01 00:43:00,1735692180,1658.84,purchase,ecom,shopping_net,True,6952.0,Магнитогорск,53.407189,58.979143,2.60.10.139,4887.0,,False,False,approved,not applicable
1,858,2025-01-01 00:51:00,1735692660,1131.71,purchase,POS,gas_transport,False,4438.0,Барнаул,53.294364,83.771525,not applicable,,,False,False,approved,not applicable


In [4]:
print(leg_fm_recorder.shape, leg_fm_direct.shape)
assert leg_fm_recorder.shape == leg_fm_direct.shape
leg_shape = leg_fm_recorder.shape

(10351, 19) (10351, 19)


In [5]:
compr_fm_recorder = all_recorder.read_compromised()
compr_fm_recorder.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,521,2025-01-20 09:41:50,1737366110,33000.0,purchase,ecom,misc_net,True,6821.0,Саратов,51.530376,45.953026,5.3.255.240,11265.0,,True,False,declined,new_ip_and_device_high_amount
1,1584,2025-01-19 20:55:02,1737320102,2777.0,purchase,POS,home,False,2456.0,Якутск,62.115507,129.729329,not applicable,,,True,False,declined,fast_geo_change


In [6]:
compr_path = os.path.join(run_dir, "compromised", "compr_client_txns.parquet")
compr_fm_direct = pd.read_parquet(compr_path)
compr_fm_direct.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,521,2025-01-20 09:41:50,1737366110,33000.0,purchase,ecom,misc_net,True,6821.0,Саратов,51.530376,45.953026,5.3.255.240,11265.0,,True,False,declined,new_ip_and_device_high_amount
1,1584,2025-01-19 20:55:02,1737320102,2777.0,purchase,POS,home,False,2456.0,Якутск,62.115507,129.729329,not applicable,,,True,False,declined,fast_geo_change


In [7]:
print(compr_fm_recorder.shape, compr_fm_direct.shape)
assert compr_fm_recorder.shape == compr_fm_direct.shape
compr_shape = compr_fm_recorder.shape

(180, 19) (180, 19)


In [8]:
dist_fm_recorder = all_recorder.read_dist_drops()
dist_fm_recorder.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,2275,2025-01-08 16:39:00,1736354340,24500.0,inbound,transfer,not applicable,True,,not applicable,,,not applicable,,12152.0,False,False,approved,not applicable
1,2275,2025-01-08 17:17:00,1736356620,11400.0,withdrawal,ATM,not applicable,False,,Новокузнецк,53.794276,87.214405,not applicable,,12152.0,False,False,approved,not applicable


In [9]:
dist_path = os.path.join(run_dir, "dist_drops", "dist_drop_txns.parquet")
dist_fm_direct = pd.read_parquet(dist_path)
dist_fm_direct.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,2275,2025-01-08 16:39:00,1736354340,24500.0,inbound,transfer,not applicable,True,,not applicable,,,not applicable,,12152.0,False,False,approved,not applicable
1,2275,2025-01-08 17:17:00,1736356620,11400.0,withdrawal,ATM,not applicable,False,,Новокузнецк,53.794276,87.214405,not applicable,,12152.0,False,False,approved,not applicable


In [10]:
print(dist_fm_recorder.shape, dist_fm_direct.shape)
assert dist_fm_recorder.shape == dist_fm_direct.shape
dist_shape = dist_fm_recorder.shape

(30, 19) (30, 19)


In [11]:
purch_fm_recorder = all_recorder.read_purch_drops()
purch_fm_recorder.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,1791,2025-01-18 05:06:00,1737176760,23900.0,inbound,transfer,not applicable,True,,not applicable,,,not applicable,,11688.0,False,False,approved,not applicable
1,1791,2025-01-18 07:15:00,1737184500,21000.0,purchase,ecom,shopping_net,True,6850.0,Ростов-на-Дону,47.222436,39.718787,2.60.6.153,3063.0,,False,False,approved,not applicable


In [12]:
purch_path = os.path.join(run_dir, "purch_drops", "purch_drop_txns.parquet")
purch_fm_direct = pd.read_parquet(purch_path)
purch_fm_direct.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,1791,2025-01-18 05:06:00,1737176760,23900.0,inbound,transfer,not applicable,True,,not applicable,,,not applicable,,11688.0,False,False,approved,not applicable
1,1791,2025-01-18 07:15:00,1737184500,21000.0,purchase,ecom,shopping_net,True,6850.0,Ростов-на-Дону,47.222436,39.718787,2.60.6.153,3063.0,,False,False,approved,not applicable


In [13]:
print(purch_fm_recorder.shape, purch_fm_direct.shape)
assert purch_fm_recorder.shape == purch_fm_direct.shape
purch_shape = purch_fm_recorder.shape

(30, 19) (30, 19)


**build_and_write**

In [14]:
all_recorder.build_and_write()

Building and writing all txns dataframe... completed.. |


In [15]:
total_shape = sum([leg_shape[0], compr_shape[0], dist_shape[0], purch_shape[0]])
total_shape

10591

In [16]:
all_recorder.whole_df.shape

(10591, 19)

In [18]:
all_txns_fm_recorder = all_recorder.whole_df

In [17]:
path = os.path.join(run_dir, "all_txns.parquet")
all_txns_fm_direct = pd.read_parquet(path)
all_txns_fm_direct.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,2853,2025-01-01 00:43:00,1735692180,1658.84,purchase,ecom,shopping_net,True,6952.0,Магнитогорск,53.407189,58.979143,2.60.10.139,4887.0,,False,False,approved,not applicable
1,858,2025-01-01 00:51:00,1735692660,1131.71,purchase,POS,gas_transport,False,4438.0,Барнаул,53.294364,83.771525,not applicable,,,False,False,approved,not applicable


In [19]:
print(all_txns_fm_recorder.shape, all_txns_fm_direct.shape)
assert all_txns_fm_recorder.shape == all_txns_fm_direct.shape
all_txns_shape = all_txns_fm_recorder.shape

(10591, 19) (10591, 19)


In [20]:
all_path = base_cfg["data_paths"]["generated"]["all_txns"]
all_txns_latest = pd.read_parquet(all_path)
all_txns_latest.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,2853,2025-01-01 00:43:00,1735692180,1658.84,purchase,ecom,shopping_net,True,6952.0,Магнитогорск,53.407189,58.979143,2.60.10.139,4887.0,,False,False,approved,not applicable
1,858,2025-01-01 00:51:00,1735692660,1131.71,purchase,POS,gas_transport,False,4438.0,Барнаул,53.294364,83.771525,not applicable,,,False,False,approved,not applicable


In [21]:
print(all_txns_latest.shape, all_txns_fm_direct.shape)
assert all_txns_latest.shape == all_txns_fm_direct.shape
all_txns_shape = all_txns_latest.shape

(10591, 19) (10591, 19)


# **Готовый генератор `run`**
- генерация всех типов транзакций, запись в файлы
- сборка единого файла с транзакциями

In [1]:
import os
import pandas as pd
import yaml
os.chdir("..")

# Общие настройки
with open("./config/base.yaml") as f:
    base_cfg = yaml.safe_load(f)

In [5]:
hist_dir = "./data/generated/history/"
run_folder = os.listdir(hist_dir)[-1]
run_dir = os.path.join(hist_dir, run_folder)

In [6]:
leg_path = os.path.join(run_dir, "legit", "legit_txns.parquet")
leg_fm_run = pd.read_parquet(leg_path)
leg_fm_run.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,3620,2025-01-01 00:35:00,1735691700,2200.0,purchase,ecom,shopping_net,True,6855.0,Липецк,52.610303,39.594627,2.60.13.103,6224.0,,False,False,approved,not applicable
1,3545,2025-01-01 00:49:00,1735692540,1015.27,purchase,ecom,grocery_net,True,6831.0,Самара,53.195166,50.106769,2.60.13.33,6093.0,,False,False,approved,not applicable


In [8]:
data_paths = base_cfg["data_paths"]
leg_latest_path = data_paths["generated"]["legit_txns"]
leg_latest = pd.read_parquet(leg_latest_path)
leg_latest.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,3620,2025-01-01 00:35:00,1735691700,2200.0,purchase,ecom,shopping_net,True,6855.0,Липецк,52.610303,39.594627,2.60.13.103,6224.0,,False,False,approved,not applicable
1,3545,2025-01-01 00:49:00,1735692540,1015.27,purchase,ecom,grocery_net,True,6831.0,Самара,53.195166,50.106769,2.60.13.33,6093.0,,False,False,approved,not applicable


In [10]:
print(leg_fm_run.shape, leg_latest.shape)
assert leg_fm_run.shape == leg_latest.shape
leg_shape = leg_fm_run.shape

(10367, 19) (10367, 19)


In [11]:
compr_path = os.path.join(run_dir, "compromised", "compr_client_txns.parquet")
compr_fm_run = pd.read_parquet(compr_path)
compr_fm_run.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,628,2025-01-19 10:23:58,1737282238,7831.75,purchase,ecom,misc_net,True,6915.0,Комсомольск-на-Амуре,50.550011,137.007929,5.8.6.46,11756.0,,True,False,declined,fast_geo_change_online
1,3094,2025-01-19 21:32:49,1737322369,12580.0,purchase,ecom,misc_net,True,6854.0,Кемерово,55.390972,86.046786,5.8.2.104,12095.0,,True,False,declined,new_device_and_high_amount


In [12]:
data_paths = base_cfg["data_paths"]
compr_latest_path = data_paths["generated"]["compr_client_txns"]
compr_latest = pd.read_parquet(compr_latest_path)
compr_latest.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,628,2025-01-19 10:23:58,1737282238,7831.75,purchase,ecom,misc_net,True,6915.0,Комсомольск-на-Амуре,50.550011,137.007929,5.8.6.46,11756.0,,True,False,declined,fast_geo_change_online
1,3094,2025-01-19 21:32:49,1737322369,12580.0,purchase,ecom,misc_net,True,6854.0,Кемерово,55.390972,86.046786,5.8.2.104,12095.0,,True,False,declined,new_device_and_high_amount


In [13]:
print(compr_fm_run.shape, compr_latest.shape)
assert compr_fm_run.shape == compr_latest.shape
compr_shape = compr_fm_run.shape

(156, 19) (156, 19)


In [14]:
dist_path = os.path.join(run_dir, "dist_drops", "dist_drop_txns.parquet")
dist_fm_run = pd.read_parquet(dist_path)
dist_fm_run.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,5175,2025-01-09 12:03:00,1736424180,41700.0,inbound,transfer,not applicable,True,,not applicable,,,not applicable,,14648,False,False,approved,not applicable
1,5175,2025-01-09 13:20:00,1736428800,41700.0,withdrawal,ATM,not applicable,False,,Чебоксары,56.143938,47.248872,not applicable,,14648,False,False,approved,not applicable


In [15]:
data_paths = base_cfg["data_paths"]
dist_latest_path = data_paths["generated"]["dist_drop_txns"]
dist_latest = pd.read_parquet(dist_latest_path)
dist_latest.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,5175,2025-01-09 12:03:00,1736424180,41700.0,inbound,transfer,not applicable,True,,not applicable,,,not applicable,,14648,False,False,approved,not applicable
1,5175,2025-01-09 13:20:00,1736428800,41700.0,withdrawal,ATM,not applicable,False,,Чебоксары,56.143938,47.248872,not applicable,,14648,False,False,approved,not applicable


In [16]:
print(dist_latest.shape, dist_fm_run.shape)
assert dist_latest.shape == dist_fm_run.shape
dist_shape = dist_latest.shape

(32, 19) (32, 19)


In [17]:
purch_path = os.path.join(run_dir, "purch_drops", "purch_drop_txns.parquet")
purch_fm_run = pd.read_parquet(purch_path)
purch_fm_run.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,4649,2025-01-15 16:38:00,1736959080,42500.0,inbound,transfer,not applicable,True,,not applicable,,,not applicable,,14394.0,False,False,approved,not applicable
1,4649,2025-01-15 17:29:00,1736962140,28000.0,purchase,ecom,misc_net,True,6881.0,Курган,55.444345,65.316134,2.60.17.43,7969.0,,False,False,approved,not applicable


In [18]:
data_paths = base_cfg["data_paths"]
purch_latest_path = data_paths["generated"]["purch_drop_txns"]
purch_latest = pd.read_parquet(purch_latest_path)
purch_latest.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,4649,2025-01-15 16:38:00,1736959080,42500.0,inbound,transfer,not applicable,True,,not applicable,,,not applicable,,14394.0,False,False,approved,not applicable
1,4649,2025-01-15 17:29:00,1736962140,28000.0,purchase,ecom,misc_net,True,6881.0,Курган,55.444345,65.316134,2.60.17.43,7969.0,,False,False,approved,not applicable


In [19]:
print(purch_fm_run.shape, purch_latest.shape)
assert purch_fm_run.shape == purch_latest.shape
purch_shape = purch_fm_run.shape

(34, 19) (34, 19)


In [20]:
total_shape = sum([leg_shape[0], compr_shape[0], dist_shape[0], purch_shape[0]])
total_shape

10589

In [23]:
path = os.path.join(run_dir, "all_txns.parquet")
all_txns_fm_run = pd.read_parquet(path)
all_txns_fm_run.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,3620,2025-01-01 00:35:00,1735691700,2200.0,purchase,ecom,shopping_net,True,6855.0,Липецк,52.610303,39.594627,2.60.13.103,6224.0,,False,False,approved,not applicable
1,3545,2025-01-01 00:49:00,1735692540,1015.27,purchase,ecom,grocery_net,True,6831.0,Самара,53.195166,50.106769,2.60.13.33,6093.0,,False,False,approved,not applicable


In [22]:
all_path = base_cfg["data_paths"]["generated"]["all_txns"]
all_txns_latest = pd.read_parquet(all_path)
all_txns_latest.head(2)

Unnamed: 0,client_id,txn_time,unix_time,amount,type,channel,category,online,merchant_id,trans_city,trans_lat,trans_lon,trans_ip,device_id,account,is_fraud,is_suspicious,status,rule
0,3620,2025-01-01 00:35:00,1735691700,2200.0,purchase,ecom,shopping_net,True,6855.0,Липецк,52.610303,39.594627,2.60.13.103,6224.0,,False,False,approved,not applicable
1,3545,2025-01-01 00:49:00,1735692540,1015.27,purchase,ecom,grocery_net,True,6831.0,Самара,53.195166,50.106769,2.60.13.33,6093.0,,False,False,approved,not applicable


In [24]:
print(all_txns_latest.shape, all_txns_fm_run.shape)
assert all_txns_latest.shape == all_txns_fm_run.shape
all_txns_shape = all_txns_latest.shape

(10589, 19) (10589, 19)
