In [1]:
import pandas as pd
import numpy as np
import torch

from torch.utils.data import DataLoader

In [2]:
from lstm_scratch import SimpleLSTM
from helpers import CVFConfigForAnalysisDataset

In [3]:
model_name = "lstm_trained_at_2025_04_10_00_11"

# graph_name = "star_graph_n15"
# graph_name = "star_graph_n7"
# graph_name = "graph_powerlaw_cluster_graph_n7"
graph_name = "graph_random_regular_graph_n8_d4"

In [4]:
device = "cuda" 

In [5]:
# Model class must be defined somewhere
model = torch.load(f"trained_models/{model_name}.pt", weights_only=False)
model.eval()

SimpleLSTM(
  (lstm): GRU(3, 32, batch_first=True)
  (norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
  (h2o): Linear(in_features=32, out_features=1, bias=True)
)

In [6]:
dataset = CVFConfigForAnalysisDataset(device, graph_name)

Total configs: 390,625.


In [None]:
data = []
# result_df = pd.DataFrame([], columns=['node', 'rank_effect'])

result_df = pd.DataFrame({
    'node': pd.Series(dtype='int'),
    'rank effect': pd.Series(dtype='float')
})

with torch.no_grad():
    test_dataloader = DataLoader(dataset, batch_size=1)

    count = 0
    for batch in test_dataloader:
        for i in range(len(batch[0])):
            frm_idx = batch[1][i].item()
            frm_rank = model(batch[0][i].unsqueeze(0))
            for (
                position,
                to_indx,
            ) in dataset.cvf_analysis.possible_perturbed_state_frm(frm_idx):
                to = dataset[to_indx]
                to_rank = model(to[0].unsqueeze(0))
                rank_effect = (frm_rank - to_rank).item()  # to round off at 0.5
                data.append({'node': position, 'rank effect': rank_effect})

        temp_df = pd.DataFrame(data, columns=['node', 'rank effect'])
        data = []
        result_df = pd.concat([result_df, temp_df], ignore_index=True)


result_df['rank effect'] = np.floor(result_df['rank effect']+0.5)
# result_df.to_csv(f"ml_predictions/{model_name}__{graph_name}__cvf.csv")

print("Done!")


In [None]:
result_df

Unnamed: 0,node,rank effect
0,0,-2.0
1,0,-1.0
2,0,-2.0
3,0,-2.0
4,1,-2.0
...,...,...
2187495,5,0.0
2187496,6,0.0
2187497,6,0.0
2187498,6,0.0


In [None]:
ml_grp_by_node_re = result_df.groupby(['node', 'rank effect']).size().reset_index(name='ml_count')
ml_grp_by_node_re.to_csv(f"ml_predictions/{model_name}__{graph_name}__cvf_by_node.csv")

In [None]:
ml_grp_by_re = result_df.groupby(['rank effect']).size().reset_index(name='ml_count')
ml_grp_by_re.to_csv(f"ml_predictions/{model_name}__{graph_name}__cvf.csv")

# Full analysis data fetch

In [None]:
import os

results_dir = os.path.join(
    os.getenv("CVF_PROJECT_DIR", ""),
    "cvf-analysis",
    "v2",
    "results",
    "coloring"
)

In [None]:
results_file = f"rank_effects_avg__{graph_name}.csv"
file_path = os.path.join(results_dir, results_file)
f_grp_by_re = pd.read_csv(file_path)
f_grp_by_re = f_grp_by_re.drop(f_grp_by_re.columns[0], axis=1)
f_grp_by_re.rename(columns={'count': 'fa_count'}, inplace=True)

f_grp_by_re

Unnamed: 0,rank effect,fa_count
0,-2,84000
1,-1,583380
2,0,852740
3,1,583380
4,2,84000


In [None]:
df_grp_by_re = pd.merge(f_grp_by_re, ml_grp_by_re, on='rank effect', how='outer').fillna(0)
df_grp_by_re

Unnamed: 0,rank effect,fa_count,ml_count
0,-4.0,0.0,1
1,-3.0,0.0,2110
2,-2.0,84000.0,108045
3,-1.0,583380.0,573701
4,0.0,852740.0,819786
5,1.0,583380.0,573701
6,2.0,84000.0,108045
7,3.0,0.0,2110
8,4.0,0.0,1


In [None]:
df_grp_by_re.to_csv(f"ml_predictions/{model_name}__{graph_name}__cvf.csv")

In [None]:
results_file = f"rank_effects_by_node_avg__{graph_name}.csv"

file_path = os.path.join(results_dir, results_file)
f_grp_by_node_re = pd.read_csv(file_path)

f_grp_by_node_re = f_grp_by_node_re.melt(id_vars='node', value_vars=set(f_grp_by_node_re.columns)-{'node'}, var_name='rank effect', value_name='fa_count')
f_grp_by_node_re['rank effect'] = f_grp_by_node_re['rank effect'].astype(float)
f_grp_by_node_re

Unnamed: 0,node,rank effect,fa_count
0,0,1.0,83340
1,1,1.0,83340
2,2,1.0,83340
3,3,1.0,83340
4,4,1.0,83340
5,5,1.0,83340
6,6,1.0,83340
7,0,-2.0,12000
8,1,-2.0,12000
9,2,-2.0,12000


In [None]:
df_grp_by_node_re = pd.merge(f_grp_by_node_re, ml_grp_by_node_re, on=['node', 'rank effect'], how='outer').fillna(0)
df_grp_by_node_re

Unnamed: 0,node,rank effect,fa_count,ml_count
0,0,-3.0,0.0,209
1,0,-2.0,12000.0,14606
2,0,-1.0,83340.0,81955
3,0,0.0,121820.0,118960
4,0,1.0,83340.0,81955
5,0,2.0,12000.0,14606
6,0,3.0,0.0,209
7,1,-3.0,0.0,84
8,1,-2.0,12000.0,14819
9,1,-1.0,83340.0,82464


In [None]:
df_grp_by_node_re.to_csv(f"ml_predictions/{model_name}__{graph_name}__cvf_by_node.csv")