En los archivos de diff de Echidna, los datos se guardan con este formato:

```
    obj = {
        "Contract": contract,
        "Test Limit": test_limit,
        "Mode": mode,
        "Node Difference": node_difference,
        "Edge Difference": edge_difference,
        "Reduce Combinations": option
    } 
```

Y para guardar el tiempo, se hace esto:
```
    obj = {
        "contract": contract,
        "test_limit": test_limit,
        "mode": mode,
        "reduce combinations": reducing,
        "time_in_s": time_taken_in_seconds,
    }
```

Debería joinear mis jsons por contract, test_limit, mode, reduce combinations y que luego se agreguen node_difference, edge_difference y time_in_s.

----

#### Función para mergear tiempos y diffs

In [1]:
import json
import pandas as pd

def merge_jsons(diff_json_file, times_json_file):

    with open(diff_json_file) as diff_json:
        diff = json.load(diff_json)

    with open(times_json_file) as times_json:
        times = json.load(times_json)
    
    diff_df = pd.DataFrame(diff)
    times_df = pd.DataFrame(times)

    # hago que los valores estén representados de la misma manera en las 2 tablas para que se pueda hacer bien el join
    times_df['mode'] = times_df['mode'].apply(lambda x: 'epa' if x == 'e' else 'states')
    times_df['reduce combinations'] = times_df['reduce combinations'].apply(lambda x: 'reduce_combinations' if x else 'not_reduce_combinations')

    # hago el join y descarto las columnas que se repitenn
    result = pd.merge(diff_df, times_df, how='inner', left_on=['Contract', 'Test Limit', 'Mode', 'Reduce Combinations'], right_on=['contract', 'test_limit', 'mode', 'reduce combinations'])

    # drop duplicated columns (contract, test_limit, mode and reduce combinations)
    result = result.drop(columns=['contract', 'test_limit', 'mode', 'reduce combinations'])

    return result


In [4]:
benchmark1 = merge_jsons('../results/diffs/diff_echidna_benchmark_1.json', '../results/times/echidna_benchmark1_times')
benchmark2 = merge_jsons('../results/diffs/diff_echidna_benchmark_2.json', '../results/times/echidna_benchmark2_times')

In [5]:
benchmark1.head(15)

Unnamed: 0,Contract,Test Limit,Mode,Node Difference,Edge Difference,Reduce Combinations,time_in_s
0,BasicProvenance,1000,epa,0,0,not_reduce_combinations,3.62
1,BasicProvenance,50000,epa,0,0,not_reduce_combinations,17.92
2,BasicProvenance,500000,epa,0,0,not_reduce_combinations,146.37
3,BasicProvenance,1000,epa,-1,-1,reduce_combinations,6.91
4,BasicProvenance,50000,epa,0,0,reduce_combinations,21.96
5,BasicProvenance,500000,epa,0,0,reduce_combinations,149.17
6,DefectiveComponentCounter,1000,epa,0,0,not_reduce_combinations,3.94
7,DefectiveComponentCounter,50000,epa,0,1,not_reduce_combinations,30.0
8,DefectiveComponentCounter,500000,epa,0,1,not_reduce_combinations,260.11
9,DefectiveComponentCounter,1000,epa,0,0,reduce_combinations,5.69


In [6]:
benchmark2.head(15)

Unnamed: 0,Contract,Test Limit,Mode,Node Difference,Edge Difference,Reduce Combinations,time_in_s
0,Auction,1000,epa,-1,-10,not_reduce_combinations,14.71
1,Auction,50000,epa,0,-4,not_reduce_combinations,41.96
2,Auction,500000,epa,0,-2,not_reduce_combinations,291.16
3,Auction,1000,epa,-1,-7,reduce_combinations,7.52
4,Auction,50000,epa,-1,-7,reduce_combinations,17.41
5,Auction,500000,epa,0,-2,reduce_combinations,126.89
6,Crowdfunding,1000,epa,-1,-7,not_reduce_combinations,8.61
7,Crowdfunding,50000,epa,-1,-7,not_reduce_combinations,37.02
8,Crowdfunding,500000,epa,0,-2,not_reduce_combinations,296.77
9,Crowdfunding,1000,epa,-1,-7,reduce_combinations,10.94


In [7]:
benchmark1['Reduce Combinations'] = benchmark1['Reduce Combinations'].apply(lambda x: 'True' if x == 'reduce_combinations' else 'False')
benchmark2['Reduce Combinations'] = benchmark2['Reduce Combinations'].apply(lambda x: 'True' if x == 'reduce_combinations' else 'False')

In [8]:
benchmark1.head(15)

Unnamed: 0,Contract,Test Limit,Mode,Node Difference,Edge Difference,Reduce Combinations,time_in_s
0,BasicProvenance,1000,epa,0,0,False,3.62
1,BasicProvenance,50000,epa,0,0,False,17.92
2,BasicProvenance,500000,epa,0,0,False,146.37
3,BasicProvenance,1000,epa,-1,-1,True,6.91
4,BasicProvenance,50000,epa,0,0,True,21.96
5,BasicProvenance,500000,epa,0,0,True,149.17
6,DefectiveComponentCounter,1000,epa,0,0,False,3.94
7,DefectiveComponentCounter,50000,epa,0,1,False,30.0
8,DefectiveComponentCounter,500000,epa,0,1,False,260.11
9,DefectiveComponentCounter,1000,epa,0,0,True,5.69


In [9]:
benchmark2.head(15)

Unnamed: 0,Contract,Test Limit,Mode,Node Difference,Edge Difference,Reduce Combinations,time_in_s
0,Auction,1000,epa,-1,-10,False,14.71
1,Auction,50000,epa,0,-4,False,41.96
2,Auction,500000,epa,0,-2,False,291.16
3,Auction,1000,epa,-1,-7,True,7.52
4,Auction,50000,epa,-1,-7,True,17.41
5,Auction,500000,epa,0,-2,True,126.89
6,Crowdfunding,1000,epa,-1,-7,False,8.61
7,Crowdfunding,50000,epa,-1,-7,False,37.02
8,Crowdfunding,500000,epa,0,-2,False,296.77
9,Crowdfunding,1000,epa,-1,-7,True,10.94


In [17]:
# sort both dfs by test limit
benchmark1 = benchmark1.sort_values(by=['Mode', 'Test Limit', 'Contract'])
benchmark2 = benchmark2.sort_values(by=['Mode', 'Test Limit', 'Contract'])

In [18]:
benchmark1.head(15)

Unnamed: 0,Contract,Test Limit,Mode,Node Difference,Edge Difference,Reduce Combinations,time_in_s
0,BasicProvenance,1000,epa,0,0,False,3.62
3,BasicProvenance,1000,epa,-1,-1,True,6.91
42,BasicProvenanceFixed,1000,epa,0,0,False,4.11
45,BasicProvenanceFixed,1000,epa,-1,-1,True,6.65
6,DefectiveComponentCounter,1000,epa,0,0,False,3.94
9,DefectiveComponentCounter,1000,epa,0,0,True,5.69
48,DefectiveComponentCounterFixed,1000,epa,0,0,False,3.87
51,DefectiveComponentCounterFixed,1000,epa,0,0,True,9.08
12,FrequentFlyerRewardsCalculator,1000,epa,0,-1,False,4.69
15,FrequentFlyerRewardsCalculator,1000,epa,0,-1,True,6.47


In [19]:
benchmark2.head(15)

Unnamed: 0,Contract,Test Limit,Mode,Node Difference,Edge Difference,Reduce Combinations,time_in_s
0,Auction,1000,epa,-1,-10,False,14.71
3,Auction,1000,epa,-1,-7,True,7.52
87,AuctionEnded,1000,epa,-1,-7,True,6.9
84,AuctionEnded,1000,epa,-1,-11,False,43.46
90,AuctionWithdraw,1000,epa,-3,-23,False,50.93
93,AuctionWithdraw,1000,epa,-3,-22,True,6.79
6,Crowdfunding,1000,epa,-1,-7,False,8.61
9,Crowdfunding,1000,epa,-1,-7,True,10.94
54,CrowdfundingTime_Base,1000,epa,-2,-15,False,16.58
57,CrowdfundingTime_Base,1000,epa,0,-7,True,10.86


In [20]:
# transform df to csv and export
def export_as_csv(df, filename):
    df.to_csv(filename, index=False)

export_as_csv(benchmark1, '../results/echidna_benchmark1_merged.csv')
export_as_csv(benchmark2, '../results/echidna_benchmark2_merged.csv')

