In [1]:
import pandas as pd
from numpy import sqrt

In [2]:
def read_data(path, filename, nmin, nmax, m_min, m_max):
    df = pd.read_csv(path + "/" + filename)
    df = df[(df['N'] >= nmin) & (df['N'] <= nmax)]
    
    # Create a mask for all conditions at once
    mask = ( (df['M'] >= df['N'].map(m_min)) & (df['M'] <= df['N'].map(m_max)) )
    
    return df[mask]


In [3]:
def compute_average_energy(df):
    df_ns = df[df['E'] > 0].astype({'E': 'float64'})
    df_ns.loc[:, 'E'] = df_ns['E'] / df_ns['M']
    return df_ns['E'].mean(), sqrt(df_ns['E'].pow(2).mean() - df_ns['E'].mean() * df_ns['E'].mean()), df_ns['E'].std(ddof=0)

In [4]:
def check_missing(df, nsamples):
    for N in df['N'].unique():
        for M in df[df['N'] == N]['M'].unique():
            for i in range(1, nsamples + 1):
                if i not in df[df['M'] == M]['id'].values:
                    print(f"Missing sample: N={N}, M={M}, c={M/N*2}, i={i}")

# 3-COL

Reading data

In [6]:
path = "/media/david/Data/UH/Grupo_de_investigacion/Hard_benchmarks/Coloring/PI-GNN/Results/Recurrent/random_graphs/Mixed/q_3/Stats"
exp_min = 4
exp_max = 8
cmin = 3.32
cmax = 4.96
nmin = 2 ** exp_min
nmax = 2 ** exp_max
m_min = {}
m_max = {}
for exp in range(exp_min, exp_max + 1):
    m_min[2 ** exp] = int(round(2 ** exp * cmin / 2))
    m_max[2 ** exp] = int(round(2 ** exp * cmax / 2))

Getting the number of solutions found between $N_{min}$ and $N_{max}$

In [7]:
filename = "3COL_rPI-GNN_ntrials=5.csv"
df3 = read_data(path, filename, nmin, nmax, m_min, m_max)
(df3['E'] == 0).sum(), df3.shape[0], (df3['E'] == 0).sum() / df3.shape[0]

(11236, 20000, 0.5618)

In [8]:
filename = "3COL_rPI-GNN_ntrials=5_new.csv"
df3 = read_data(path, filename, nmin, nmax, m_min, m_max)
(df3['E'] == 0).sum(), df3.shape[0], (df3['E'] == 0).sum() / df3.shape[0]

(10868, 20000, 0.5434)

In [9]:
filename = "3COL_rPI-GNN_ntrials=5.csv"
df3 = read_data(path, filename, nmin, nmax, m_min, m_max)
compute_average_energy(df3)

(0.019204459936798965, 0.015796964701764074, 0.015796964701764074)

In [10]:
filename = "3COL_rPI-GNN_ntrials=5_new.csv"
df3 = read_data(path, filename, nmin, nmax, m_min, m_max)
compute_average_energy(df3)

(0.019037831537714354, 0.015873303995541446, 0.015873303995541446)

In [10]:
check_missing(df3, 400)

# 5-COL

Reading data

In [11]:
path = "/media/david/Data/UH/Grupo_de_investigacion/Hard_benchmarks/Coloring/PI-GNN/Results/Recurrent/random_graphs/Mixed/q_5/Stats"
exp_min = 4
exp_max = 8
cmin = 9.9
cmax = 13.5
nmin = 2 ** exp_min
nmax = 2 ** exp_max
m_min = {}
m_max = {}
for exp in range(exp_min, exp_max + 1):
    m_min[2 ** exp] = int(round(2 ** exp * cmin / 2))
    m_max[2 ** exp] = int(round(2 ** exp * cmax / 2))

Getting the number of solutions found between $N_{min}$ and $N_{max}$

In [12]:
filename = "5COL_rPI-GNN_ntrials=5.csv"
df5 = read_data(path, filename, nmin, nmax, m_min, m_max)
(df5['E'] == 0).sum(), df5.shape[0], (df5['E'] == 0).sum() / df5.shape[0]

(7697, 20000, 0.38485)

In [13]:
filename = "5COL_rPI-GNN_ntrials=5_new.csv"
df5 = read_data(path, filename, nmin, nmax, m_min, m_max)
(df5['E'] == 0).sum(), df5.shape[0], (df5['E'] == 0).sum() / df5.shape[0]

(6724, 20000, 0.3362)

In [14]:
filename = "5COL_rPI-GNN_ntrials=5_new.csv"
df5 = read_data(path, filename, nmin, nmax, m_min, m_max)
compute_average_energy(df5)

(0.02312624542240631, 0.0248316442217488, 0.0248316442217488)

In [15]:
filename = "5COL_rPI-GNN_ntrials=5.csv"
df5 = read_data(path, filename, nmin, nmax, m_min, m_max)
compute_average_energy(df5)

(0.023751781780938234, 0.025344023142034572, 0.025344023142034572)

In [15]:
check_missing(df5, 400)