In [2]:
!cat /Users/jl102430/Documents/study/anomaly_detection/data/dynamic/DGraph/DGraphFin/Readme.md

# Description of DGraphFin datafile.

File **dgraphfin.npz** including below keys:  

- **x**: 17-dimensional node features.
- **y**: node label.  
    There four classes. Below are the nodes counts of each class.     
    0: 1210092    
    1: 15509    
    2: 1620851    
    3: 854098    
    Nodes of Class 1 are fraud users and nodes of 0 are normal users, and they the two classes to be predicted.    
    Nodes of Class 2 and Class 3 are background users.    
    
- **edge_index**: shape (4300999, 2).   
    Each edge is in the form (id_a, id_b), where ids are the indices in x.        

- **edge_type**: 11 types of edges. 
    
- **edge_timestamp**: the desensitized timestamp of each edge.
    
- **train_mask, valid_mask, test_mask**:  
    Nodes of Class 0 and Class 1 are randomly splitted by 70/15/15.  

    


    

# Load Data

In [3]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import plotly.express as px

import torch
from torch_geometric.utils import dense_to_sparse, to_dense_adj

In [4]:
data_path = "/Users/jl102430/Documents/study/anomaly_detection/data/dynamic/DGraph/DGraphFin/dgraphfin.npz"


In [5]:
data = np.load(data_path)
data

<numpy.lib.npyio.NpzFile at 0x7fd707ed59d0>

In [6]:
data.files

['x',
 'y',
 'edge_index',
 'edge_type',
 'edge_timestamp',
 'train_mask',
 'valid_mask',
 'test_mask']

In [7]:
X = data['x']
y = data['y']

edge_index = data['edge_index']
edge_type = data['edge_type']
edge_timestamp = data['edge_timestamp']

train_mask = data['train_mask']
valid_mask = data['valid_mask']
test_mask = data['test_mask']


print(f"""
X shape: {X.shape},
y shape: {y.shape}

edge_index shape: {edge_index.shape}
edge_type shape: {edge_type.shape}
edge_timestamp shape: {edge_timestamp.shape}

train_mask shape: {train_mask.shape}
valid_mask shape: {valid_mask.shape}
test_mask shape: {test_mask.shape}
""")


X shape: (3700550, 17),
y shape: (3700550,)

edge_index shape: (4300999, 2)
edge_type shape: (4300999,)
edge_timestamp shape: (4300999,)

train_mask shape: (857899,)
valid_mask shape: (183862,)
test_mask shape: (183840,)



# Basic Stats

In [9]:
edge_index[edge_timestamp == 1]

array([[ 730048,  627630],
       [2261574,  430345],
       [2054903, 1211109],
       ...,
       [2239888,  784699],
       [1849431, 1947401],
       [1782210,  730013]])

In [10]:
graph1 = torch.tensor(edge_index[edge_timestamp == 1])
graph1

tensor([[ 730048,  627630],
        [2261574,  430345],
        [2054903, 1211109],
        ...,
        [2239888,  784699],
        [1849431, 1947401],
        [1782210,  730013]])

In [11]:
for feat in range(X.shape[1]):
    uni = np.unique(X[:,feat])
    print(f"Feat {feat} ({uni.shape}): {uni};")

Feat 0 ((3,)): [-1.  0.  1.];
Feat 1 ((10,)): [-1.  0.  1.  2.  3.  4.  5.  6.  7.  8.];
Feat 2 ((4539,)): [-1.00000e+00  5.00000e-03  1.00000e-02 ...  1.10590e+02  1.11815e+02
  1.53510e+02];
Feat 3 ((4406,)): [-1.00000e+00  5.00000e-03  1.00000e-02 ...  1.10585e+02  1.11475e+02
  1.53375e+02];
Feat 4 ((103579,)): [-1.          0.02844639  0.03448276 ...  0.99978899  0.99995479
  1.        ];
Feat 5 ((490,)): [-1.000e+00  1.000e-01  2.000e-01  3.000e-01  4.000e-01  5.000e-01
  6.000e-01  7.000e-01  8.000e-01  9.000e-01  1.000e+00  1.100e+00
  1.200e+00  1.300e+00  1.400e+00  1.500e+00  1.600e+00  1.700e+00
  1.800e+00  1.900e+00  2.000e+00  2.100e+00  2.200e+00  2.300e+00
  2.400e+00  2.500e+00  2.600e+00  2.700e+00  2.800e+00  2.900e+00
  3.000e+00  3.100e+00  3.200e+00  3.300e+00  3.400e+00  3.500e+00
  3.600e+00  3.700e+00  3.800e+00  3.900e+00  4.000e+00  4.100e+00
  4.200e+00  4.300e+00  4.400e+00  4.500e+00  4.600e+00  4.700e+00
  4.800e+00  4.900e+00  5.000e+00  5.100e+00  5.20

In [12]:
edge_type

array([10, 10, 10, ...,  6,  3,  1])

In [12]:
import pandas as pd
import plotly.express as px

In [13]:
data = pd.DataFrame(X, columns=[f"feat_{i}" for i in range(X.shape[1])])

data

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,feat_10,feat_11,feat_12,feat_13,feat_14,feat_15,feat_16
0,0.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
2,0.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3,1.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
4,1.0,7.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3700545,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3700546,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3700547,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
3700548,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0


In [14]:
data.describe()

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,feat_10,feat_11,feat_12,feat_13,feat_14,feat_15,feat_16
count,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0
mean,0.5076875,3.077169,-0.0880759,-0.1587935,-0.1536351,-0.2028806,0.08773051,-0.5120276,0.0667277,-0.1076214,0.7369523,-0.5100765,-0.5560902,-0.5523567,-0.2922386,-0.7176771,-0.6375034
std,0.737813,2.634612,1.431268,1.312474,0.9248059,1.329077,1.91296,0.6151994,1.874646,0.9721384,2.448908,0.5814388,0.5241674,0.5270286,0.8780555,0.4559241,0.5956139
min,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
25%,0.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
50%,1.0,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0
75%,1.0,5.0,0.5,0.415,0.8630952,0.4,0.55,0.029,0.522,0.968661,1.0,0.1105072,0.02329193,0.04651163,0.7142857,0.004065041,0.1111111
max,1.0,8.0,153.51,153.375,1.0,456.3,68.646,12.679,68.027,1.0,1313.0,1.0,1.0,2.0,3.0,1.0,1.0


In [19]:
from tqdm import tqdm
for i in tqdm(range(X.shape[1])):
    fig = px.histogram(data[data[f"feat_{i}"]!=-1], x=f'feat_{i}', title=f'Feature {i} Plot Without -1')
    fig.write_image(f'../figures/feature_{i}_histogram_no-1.png')
    # break

100%|██████████| 17/17 [01:50<00:00,  6.48s/it]


In [22]:
data['y'] = y
data

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,feat_10,feat_11,feat_12,feat_13,feat_14,feat_15,feat_16,y
0,0.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3
2,0.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3,1.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3
4,1.0,7.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3700545,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3700546,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3700547,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3700548,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2


In [24]:
px.histogram(data, x='y').write_image(f'../figures/y_{i}_histogram.png')

# Node Label Distributions

> These fraudsters borrowed money but did not pay it back (far past due), ignoring the platform’s repeated reminders

> Financial fraudsters frequently offer false personal information, some of them may also have strange social networks (compared to regular users), and some of them behave abnormally as platform operators.

**Background Nodes/Users**: Last but not least, in most real-world scenarios, not all the nodes in a graph are actually required to be classified/predicted. But removing these nodes can lose their abundant information and damage the connectivity of network structures, which is somehow like removing background knowledge from a complete story. Therefore, we term these nodes as background nodes and the opposite of them as target nodes. `However, most of the current GAD datasets ignore background nodes`.

In [10]:
data = pd.DataFrame(X, columns=[f"feat_{i}" for i in range(X.shape[1])])
data['y'] = y
data

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,feat_10,feat_11,feat_12,feat_13,feat_14,feat_15,feat_16,y
0,0.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3
2,0.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3,1.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3
4,1.0,7.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3700545,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3700546,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3700547,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3700548,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2


In [20]:
data.describe()

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,feat_10,feat_11,feat_12,feat_13,feat_14,feat_15,feat_16,y
count,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0,3700550.0
mean,0.5076875,3.077169,-0.0880759,-0.1587935,-0.1536351,-0.2028806,0.08773051,-0.5120276,0.0667277,-0.1076214,0.7369523,-0.5100765,-0.5560902,-0.5523567,-0.2922386,-0.7176771,-0.6375034,1.572605
std,0.737813,2.634612,1.431268,1.312474,0.9248059,1.329077,1.91296,0.6151994,1.874646,0.9721384,2.448908,0.5814388,0.5241674,0.5270286,0.8780555,0.4559241,0.5956139,1.166337
min,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
25%,0.0,1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
50%,1.0,3.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2.0
75%,1.0,5.0,0.5,0.415,0.8630952,0.4,0.55,0.029,0.522,0.968661,1.0,0.1105072,0.02329193,0.04651163,0.7142857,0.004065041,0.1111111,2.0
max,1.0,8.0,153.51,153.375,1.0,456.3,68.646,12.679,68.027,1.0,1313.0,1.0,1.0,2.0,3.0,1.0,1.0,3.0


In [55]:
for i in tqdm(range(X.shape[1])):
    fig = px.histogram(data, x=f'feat_{i}', facet_col='y', facet_col_wrap=2, facet_col_spacing=0.1)
    fig.update_yaxes(matches=None)
    fig.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=True))
    fig.write_image(f'../figures/node_label_distribution/feat_{i}_label_hist.png')

100%|██████████| 17/17 [02:52<00:00, 10.13s/it]


In [79]:
feat_nm = 'feat_1'

pd.Series(
    data[data[feat_nm]!=-1][feat_nm].sort_values().unique()
)

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
5    5.0
6    6.0
7    7.0
8    8.0
dtype: float64

In [104]:
sampledf = data[data!=-1][data.y==0]
stats = pd.DataFrame(sampledf.max(axis=0).reset_index().values, columns=['feature', 'max'])

stats['min'] = sampledf[sampledf!=-1].min(axis=0, numeric_only=True).values
stats['median'] = sampledf[sampledf!=-1].median(axis=0, numeric_only=True).values
stats

Unnamed: 0,feature,max,min,median
0,feat_0,1.0,0.0,1.0
1,feat_1,8.0,0.0,4.0
2,feat_2,111.815,0.005,0.86
3,feat_3,111.475,0.005,0.69
4,feat_4,1.0,0.028446,0.850442
5,feat_5,456.3,0.1,0.7
6,feat_6,68.646,0.001,1.027
7,feat_7,12.679,0.001,0.074
8,feat_8,68.027,0.001,0.971
9,feat_9,1.0,0.002558,0.968


**Integer Columns**:

- feat_0: potential gender
- feat_1: potential age groups
- feat_10: ?


# Potential Splits for Node Types

## Fixed Discrete Intervals

In [12]:
data

Unnamed: 0,feat_0,feat_1,feat_2,feat_3,feat_4,feat_5,feat_6,feat_7,feat_8,feat_9,feat_10,feat_11,feat_12,feat_13,feat_14,feat_15,feat_16,y
0,0.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
1,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3
2,0.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3,1.0,5.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,3
4,1.0,7.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3700545,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3700546,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3700547,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2
3700548,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,2


In [50]:
n_node_types = 4  # indication value
cut_summary = []
for i in tqdm(range(2, X.shape[1])):
    # labels = [f"feat_{i}_{k}" for k in range(n_node_types)]
    intervals = pd.cut(data[f'feat_{i}'], n_node_types+1)
    data['bins'] = intervals.astype(str)
    cut_summary.append(
        data.groupby(['y', 'bins'])[[f'feat_{i}']].count().reset_index()
    )


100%|██████████| 15/15 [00:25<00:00,  1.70s/it]


In [51]:
for i in tqdm(range(2, X.shape[1])):
    fig = px.bar(cut_summary[i-2], x='bins', y=f'feat_{i}', facet_col='y', facet_col_wrap=2, facet_col_spacing=0.08, title=f"Split on feat{i}")
    fig.update_yaxes(matches=None)
    fig.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=True))
    fig.show()

  0%|          | 0/15 [00:00<?, ?it/s]

  7%|▋         | 1/15 [00:00<00:02,  5.93it/s]

 13%|█▎        | 2/15 [00:00<00:01,  6.72it/s]

 20%|██        | 3/15 [00:00<00:01,  6.01it/s]

 27%|██▋       | 4/15 [00:00<00:01,  6.75it/s]

 33%|███▎      | 5/15 [00:00<00:01,  6.93it/s]

 40%|████      | 6/15 [00:00<00:01,  6.41it/s]

 47%|████▋     | 7/15 [00:01<00:01,  5.61it/s]

 53%|█████▎    | 8/15 [00:01<00:01,  4.78it/s]

 60%|██████    | 9/15 [00:01<00:01,  4.90it/s]

 67%|██████▋   | 10/15 [00:01<00:00,  5.26it/s]

 73%|███████▎  | 11/15 [00:01<00:00,  5.88it/s]

 80%|████████  | 12/15 [00:02<00:00,  6.53it/s]

 87%|████████▋ | 13/15 [00:02<00:00,  6.64it/s]

 93%|█████████▎| 14/15 [00:02<00:00,  6.31it/s]

100%|██████████| 15/15 [00:02<00:00,  6.04it/s]


## Quantile Intervals

In [53]:
n_node_types = 4  # indication value
qcut_summary = []
for i in tqdm(range(2, X.shape[1])):
    # labels = [f"feat_{i}_{k}" for k in range(n_node_types)]
    intervals = pd.qcut(data[f'feat_{i}'], n_node_types+1, duplicates='drop')
    data['bins'] = intervals.astype(str)
    qcut_summary.append(
        data.groupby(['y', 'bins'])[[f'feat_{i}']].count().reset_index()
    )


100%|██████████| 15/15 [00:29<00:00,  1.94s/it]


In [54]:
for i in tqdm(range(2, X.shape[1])):
    fig = px.bar(qcut_summary[i-2], x='bins', y=f'feat_{i}', facet_col='y', facet_col_wrap=2, facet_col_spacing=0.08, title=f"Quantile split on feat{i}")
    fig.update_yaxes(matches=None)
    fig.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=True))
    fig.show()

  0%|          | 0/15 [00:00<?, ?it/s]

  7%|▋         | 1/15 [00:00<00:01,  7.15it/s]

 13%|█▎        | 2/15 [00:00<00:01,  6.75it/s]

 20%|██        | 3/15 [00:00<00:01,  6.72it/s]

 27%|██▋       | 4/15 [00:00<00:01,  5.62it/s]

 33%|███▎      | 5/15 [00:00<00:01,  6.07it/s]

 40%|████      | 6/15 [00:00<00:01,  6.21it/s]

 47%|████▋     | 7/15 [00:01<00:01,  6.04it/s]

 53%|█████▎    | 8/15 [00:01<00:01,  6.01it/s]

 60%|██████    | 9/15 [00:01<00:00,  6.43it/s]

 67%|██████▋   | 10/15 [00:01<00:00,  6.86it/s]

 73%|███████▎  | 11/15 [00:01<00:00,  7.18it/s]

 80%|████████  | 12/15 [00:01<00:00,  6.32it/s]

 87%|████████▋ | 13/15 [00:02<00:00,  6.40it/s]

 93%|█████████▎| 14/15 [00:02<00:00,  6.07it/s]

100%|██████████| 15/15 [00:02<00:00,  6.34it/s]
