In [4]:
%load_ext autoreload
# Always reload modules so that as you change code in src, it gets loaded
%autoreload 2
%matplotlib inline

import networkx as nx
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import axes3d
import pandas as pd
import numpy as np
import random
import math
import EoN
import seaborn as sns
import time

# New Imports
#from ctrace.contact_tracing import *
#from ctrace.constraint import *
#from ctrace.solve import *
#from ctrace.simulation import *
#from ctrace.restricted import *
from ctrace.simulation import *
from ctrace.recommender import *
from ctrace.utils import *
from ctrace.exec.param import GraphParam, SIRParam, FileParam, ParamBase, LambdaParam

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Setup

In [5]:
graphs = {
    "montgomery": GraphParam('montgomery').data,
    "cville": GraphParam('cville').data
}

In [7]:
pd.set_option('display.max_columns', None)

In [8]:
OLD = 'run_3jWGp'
NEW = 'opt_ratio_b3LY8'
STATS = 'network_stats_bWPFr'
RANDOM = 'opt_ratio_NQzcz'

In [9]:
def segmented_run(run_id):
    in_data = pd.read_csv(PROJECT_ROOT / f'output/{run_id}/input.csv')
    out_data = pd.read_csv(PROJECT_ROOT / f'output/{run_id}/main.csv')
    df = pd.merge(in_data, out_data, how="left", on=["id"])

#     # Rename the labels
#     df['agent'] = df['agent'].replace({"segmented_greedy": "Segmented Degree"})
    df['graph'] = df['graph'].replace({'montgomery': "Montgomery", 'cville': "Albemarle"})
    
    return df

df_opt = pd.concat([segmented_run(OLD), segmented_run(RANDOM)])
df_opt = df_opt.drop(['is_optimal', 'total_cross_edges', 'D'], axis=1)

In [10]:
# Filter out any dropped MILP runs
valid_caches = df_opt[df_opt['agent'] == 'MILP_fair'].dropna()['from_cache']
valid_df = df_opt[df_opt['from_cache'].isin(valid_caches)]
df = valid_df.copy()

df = df.sort_values(by=['v2_size'])

df = df[df['v1_size'] >= 1000]

In [11]:
# Create normalize based on MILP 
cache_2_milp = {row['from_cache']: row['milp_obj'] for _, row in df[df['agent'] == 'MILP_fair'].iterrows()}
cache_2_milp
def normalize_milp(row):
    if cache_2_milp[row['from_cache']] != 0:
        return row['expected_obj'] / cache_2_milp[row['from_cache']]
    return np.nan
df['milp_ratio'] = df.apply(normalize_milp , axis=1)

# Create ids for to split SIRs based on |V2|

# Create an column that splits dataset into 4 groups

In [12]:
def splits(target, splits):
    col = []
    for i in range(splits - 1):
        col += [i] * int(target / splits)
    col += [splits - 1] * (target - len(col))
    return col

caches = df[df['agent'] == 'MILP_fair']['from_cache']
cache_2_id = dict(zip(caches, splits(len(caches), 4)))
df['v2_id'] = df['from_cache'].apply(lambda x: cache_2_id[x])    

In [13]:
df['agent'] = df['agent'].replace({
    'DegGreedy_fair': "\DegGreedy", 
    'DepRound_fair': "\DepRound",
    "binary_segmented_greedy": "\SegDegree",
    'MILP_fair': "\MILP",
    'Random': "\Random",
})

df = df.rename(columns={
    "agent": "Algorithms", 
    "v2_id": "bucket", 
    "milp_ratio": 'Approx. Factor', 
    'expected_obj': 'Expected Objective', 
    'time': 'Time Elapsed'
})

df = df.round({'Time Elapsed': 3, 'Expected Objective': 3, 'Approx. Factor': 3})

df_stats = segmented_run(STATS)
df_stats = df_stats[['from_cache', 'total_cross_edges', 'D']]
df = pd.merge(df, df_stats)

# Filters
df = df[df['Approx. Factor'] >= 1]

In [14]:
df[df['bucket'] == 0].to_csv('test')

In [15]:
df.head()

Unnamed: 0,id,graph,budget,Algorithms,policy,transmission_rate,transmission_known,compliance_rate,compliance_known,discovery_rate,snitch_rate,from_cache,trial_id,seed,milp_obj,Expected Objective,I_size,v1_size,v2_size,Time Elapsed,Approx. Factor,bucket,total_cross_edges,D
0,13,Montgomery,750,\Random,A,0.05,True,-1.0,True,1.0,1.0,mi25.json,0,64820,2.695012,2.818,228.0,1020.0,2031.0,0.001,1.618,0,2357,8
1,1483,Montgomery,750,\MILP,A,0.05,False,-1.0,False,1.0,1.0,mi25.json,0,60,1.741967,1.842,228.0,1020.0,2031.0,0.135,1.058,0,2357,8
2,13,Montgomery,750,\SegDegree,A,0.05,False,-1.0,False,1.0,1.0,mi25.json,0,64820,2.55638,2.668,228.0,1020.0,2031.0,0.003,1.531,0,2357,8
3,503,Montgomery,750,\DegGreedy,A,0.05,False,-1.0,False,1.0,1.0,mi25.json,0,60403,1.658124,1.754,228.0,1020.0,2031.0,0.06,1.007,0,2357,8
4,993,Montgomery,750,\DepRound,A,0.05,False,-1.0,False,1.0,1.0,mi25.json,0,23275,1.757369,1.863,228.0,1020.0,2031.0,0.168,1.069,0,2357,8


In [16]:
df[df['Algorithms'] == '\MILP']['graph'].value_counts()

Montgomery    302
Albemarle     270
Name: graph, dtype: int64

### Tables

In [17]:
net_stats = df.groupby(['graph', 'bucket'])[['I_size','v1_size','v2_size', 'total_cross_edges', 'D']].agg(['mean'])
net_stats.columns = pd.Index(['I_size','v1_size','v2_size', 'total_cross_edges', 'D'])

net_stats['I_size'] *= 0.1 ** 3
net_stats['v1_size'] *= 0.1 ** 3
net_stats['v2_size'] *= 0.1 ** 3
net_stats['total_cross_edges'] *= 0.1 ** 3

net_stats = net_stats.round({
    "I_size": 2, 
    "v1_size": 2, 
    "v2_size": 2,
    'total_cross_edges': 2, 
    "D": 2
})

net_stats = net_stats.rename(columns={
    "I_size": "$I~(\times 10^3)$", 
    "v1_size": "$|V_1|~(\times 10^3)$", 
    "v2_size": '$|V_2|(\times 10^3)$', 
    'total_cross_edges': '$|(V_1\times V_2)\cap E|~(\times 10^3)$', 
    'D': '$D$'
})

net_stats = net_stats.T

print(net_stats.to_latex(escape=False))
# net_stats

\begin{tabular}{lrrrrrrrr}
\toprule
graph & \multicolumn{4}{l}{Albemarle} & \multicolumn{4}{l}{Montgomery} \\
bucket &         0 &      1 &      2 &       3 &          0 &      1 &      2 &       3 \\
\midrule
$I~(\times 10^3)$                       &      0.36 &   2.89 &   7.46 &    3.84 &       1.60 &   4.03 &   1.77 &    0.99 \\
$|V_1|~(\times 10^3)$                   &      2.06 &  13.72 &  35.01 &   32.79 &       6.30 &  20.13 &  17.21 &   13.97 \\
$|V_2|(\times 10^3)$                    &      8.97 &  20.40 &  25.82 &   57.52 &       8.19 &  17.24 &  28.88 &   37.93 \\
$|(V_1\times V_2)\cap E|~(\times 10^3)$ &     11.82 &  45.68 &  90.73 &  298.70 &      12.76 &  44.52 &  91.63 &  123.52 \\
$D$                                     &      7.37 &  17.20 &  32.27 &   72.79 &      12.85 &  27.69 &  37.96 &   41.06 \\
\bottomrule
\end{tabular}



In [18]:
df['budget'].value_counts()

750     1442
1350    1298
Name: budget, dtype: int64

In [19]:
dp = pd.pivot_table(df[df['graph'] == 'Montgomery'].dropna(), values=["Approx. Factor", 'Expected Objective', 'Time Elapsed'], index=["Algorithms"], columns=["bucket"], aggfunc=["mean", "max"])

dp = dp.stack(0).stack(0)
dp = dp.sort_index(1)
dp = dp.reorder_levels([0, 2, 1], 0)
dp = dp.sort_index(axis=0)
dp = np.round(dp, 3)
# Reorder and omit \MILP
dp = dp.reindex(axis='index', level=0, labels=["\DegGreedy", "\DepRound", "\SegDegree", "\Random"])
# dp
print(dp.to_latex(escape=False))


\begin{tabular}{lllrrrr}
\toprule
        &              & bucket &        0 &        1 &        2 &        3 \\
Algorithms & {} & {} &          &          &          &          \\
\midrule
\DegGreedy & Approx. Factor & max &    1.229 &    1.670 &    1.771 &    1.724 \\
        &              & mean &    1.102 &    1.380 &    1.435 &    1.470 \\
        & Expected Objective & max &   88.277 &  359.140 &  468.610 &  463.584 \\
        &              & mean &   43.646 &  179.045 &  279.208 &  320.838 \\
        & Time Elapsed & max &    1.887 &    6.654 &    4.172 &    1.768 \\
        &              & mean &    0.865 &    4.270 &    1.525 &    0.666 \\
\DepRound & Approx. Factor & max &    1.362 &    1.796 &    1.915 &    1.871 \\
        &              & mean &    1.169 &    1.479 &    1.631 &    1.663 \\
        & Expected Objective & max &   97.847 &  385.880 &  510.295 &  503.374 \\
        &              & mean &   28.896 &  176.463 &  308.454 &  360.564 \\
        & Time Elapsed &

In [20]:
dp = pd.pivot_table(df[df['graph'] == 'Albemarle'].dropna(), values=["Approx. Factor", 'Expected Objective', 'Time Elapsed'], index=["Algorithms"], columns=["bucket"], aggfunc=["mean", "max"])

dp = dp.stack(0).stack(0)
dp = dp.sort_index(1)
dp = dp.reorder_levels([0, 2, 1], 0)
dp = dp.sort_index(axis=0)
dp = np.round(dp, 3)
dp = dp.reindex(axis='index', level=0, labels=["\DegGreedy", "\DepRound", "\SegDegree", "\Random"])
# dp
print(dp.to_latex(escape=False))

\begin{tabular}{lllrrrr}
\toprule
        &              & bucket &       0 &        1 &        2 &         3 \\
Algorithms & {} & {} &         &          &          &           \\
\midrule
\DegGreedy & Approx. Factor & max &   1.086 &    2.173 &    2.061 &     2.550 \\
        &              & mean &   1.068 &    1.271 &    1.513 &     2.033 \\
        & Expected Objective & max &   3.376 &  228.197 &  737.917 &  1595.941 \\
        &              & mean &   2.573 &   98.606 &  278.710 &  1011.633 \\
        & Time Elapsed & max &   0.106 &   11.560 &   19.914 &    18.356 \\
        &              & mean &   0.101 &    3.861 &    9.996 &     6.607 \\
\DepRound & Approx. Factor & max &   1.129 &    2.173 &    3.091 &     2.803 \\
        &              & mean &   1.091 &    1.321 &    1.638 &     2.182 \\
        & Expected Objective & max &  13.494 &  238.762 &  750.141 &  1639.357 \\
        &              & mean &   6.564 &   72.362 &  291.681 &  1058.894 \\
        & Time Elapsed &