In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import os
import json


In [2]:
## Global Params
plt.rcParams["font.family"] = "Nimbus Roman"
plt.rcParams["font.size"] = 18  # Adjust the font size
colors = ['#a8ddb5', '#7bccc4', '#43a2ca','#0868ac']
## Global Params


def read_text_files(directory,component,number,label):
    text_data = []
    for filename in os.listdir(directory):
        if filename.endswith('.txt'):
            with open(os.path.join(directory, filename), 'r') as file:
                d={
                'In-Flight':0,
                'Throughput':0,
                'Latency(ms)': 0,
                'ScaleFactor': component,
                "# of Components":number,
                "Label":label
                }
                for line in file:
                    if line.startswith("In-Flight Requests"):
                        inFlight = int(line.split(' ')[2].strip())
                        d['In-Flight']=inFlight
                    if line.startswith("Ops/s"):
                        parts = line.split(',')
                        throughput = int(parts[1])
                        d['Throughput']=throughput/30
                    
                    if line.startswith("Average Latency"):
                        latency = int(line.split(' ')[2])
                        d['Latency(ms)']=latency
                
                text_data.append(d)
    return text_data



def read_json_files_to_df(directory,label):
    throughput_data = []
    experiment_number = 1
    for filename in os.listdir(directory):
        if filename.endswith('.json'):
            filepath = os.path.join(directory, filename)
            with open(filepath, 'r') as file:
                data = json.load(file)
                throughput = data.get('Goodput (requests/second)', None)
                latency = data.get('Latency Distribution', None).get('Average Latency (microseconds)',None)
                if latency is not None:
                    latency = latency/1000
                if throughput is not None and latency is not None:
                    throughput_data.append({'Experiment': f'Experiment {experiment_number}', 'Throughput': throughput, "Latency(ms)":latency, "Label":label})
                    experiment_number += 1
    return pd.DataFrame(throughput_data)

### Baseline (Plaintext)

In [3]:
#Read CockroachDB
directory = "/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/Baseline/CockroachDB"
mySQLDF = read_json_files_to_df(directory,"CockroachDB")


#Read Waffle Encrypt Baseline
baselineScaleOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/Baseline/WaffleOne'

#Read Oram Encrypt Baseline
oramScaleOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/Baselines/BDB_rankings_table'

#Read Plaintext baseline
plaintextDefault = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/Baseline/Plaintext/default'

## Read and Make into DF
readORAMBaseLine = read_text_files(oramScaleOne, '1', 3, "PathOram-KV")
readWaffleBaseLine = read_text_files(baselineScaleOne, '1', 3, "Waffle-KV")
readPTBaseLine = read_text_files(plaintextDefault, '1', 3, "Plaintext-KV")

baseLinePTDF = pd.DataFrame(readPTBaseLine)
baseLineWaffleDF = pd.DataFrame(readWaffleBaseLine)
baseLineORAMDF = pd.DataFrame(readORAMBaseLine)

#Make DF
newDf = pd.concat([mySQLDF,baseLineWaffleDF,baseLinePTDF,baseLineORAMDF])
## Read and Make into DF
newDf
average_metrics = newDf.groupby('Label').agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics


Unnamed: 0,Label,Latency(ms),Throughput
0,CockroachDB,137.999,8425.470509
1,PathOram-KV,362.666667,135.0
2,Plaintext-KV,118.0,8247.233333
3,Waffle-KV,931.666667,946.444444


### Baseline(Oblivious)

In [4]:
## ObliDB Records for BDB

obliDB = pd.read_csv('/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/Baseline/ObliDB/ObliDBStats.csv')
obliDB['Label'] = 'ObliDB'
obliDB['Latency(ms)'] = obliDB['AvgLatency(s)'] * 1000

# Waffle Baseline for BDB
waffleBaselineBLOOM = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/Baseline/BDBRanking'
readWaffleBaseLine = pd.DataFrame(read_text_files(waffleBaselineBLOOM, '1', 3, "Waffle-KV"))

#Oram Baseline for BDB
ORAMBaselineBLOOM = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/Baselines/BDB_rankings_table'
readORAMBaseLine = pd.DataFrame(read_text_files(ORAMBaselineBLOOM, '1', 3, "PathOram-KV"))

# Combine the dataframes
combined_df = pd.concat([obliDB[['Label', 'Throughput','Latency(ms)']], readWaffleBaseLine[['Label', 'Throughput','Latency(ms)']], readORAMBaseLine[['Label', 'Throughput','Latency(ms)']]])

# Reset index for the combined dataframe
combined_df.reset_index(drop=True, inplace=True)

combined_df
average_metrics = combined_df.groupby('Label').agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics


Unnamed: 0,Label,Latency(ms),Throughput
0,ObliDB,10.336667,96.753333
1,PathOram-KV,362.666667,135.0
2,Waffle-KV,380.333333,2591.433333


### Scaling (Waffle)

In [5]:

scaleOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/ScalingTo5/Scale1' 
scaleTwo = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/ScalingTo5/Scale2'
ScaleThree = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/ScalingTo5/Scale3' 
ScaleFour = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/ScalingTo5/Scale4'
ScaleFive = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/ScalingTo5/Scale5'


readScaleOne = read_text_files(scaleOne, '1', 3, "10ms-Latency")
readScaleTwo = read_text_files(scaleTwo, '2', 3, "10ms-Latency")
readScaleThree = read_text_files(ScaleThree, '3', 3, "10ms-Latency")
readScaleFour = read_text_files(ScaleFour, '4', 3, "10ms-Latency")
readScaleFive = read_text_files(ScaleFive, '5', 3, "10ms-Latency")

oneDF = pd.DataFrame(readScaleOne)
twoDF = pd.DataFrame(readScaleTwo)
threeDF = pd.DataFrame(readScaleThree)
fourDF = pd.DataFrame(readScaleFour)
fiveDF = pd.DataFrame(readScaleFive)


proxyDF = pd.concat([
    oneDF, twoDF, threeDF, fourDF,fiveDF])


proxyDF.rename(columns={'ScaleFactor': 'Machines','Label':"Latency"}, inplace=True)

proxyDF
average_metrics = proxyDF.groupby('Machines').agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics.rename(columns={"Machines":"Scalefactor"})

Unnamed: 0,Scalefactor,Latency(ms),Throughput
0,1,948.666667,930.922222
1,2,890.666667,2632.355556
2,3,959.666667,4064.333333
3,4,997.0,5608.088889
4,5,954.333333,6589.677778


### Scaling (ORAM)

In [6]:

scaleOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/ScalingTo5/Scale1' 
scaleTwo = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/ScalingTo5/Scale2'
ScaleThree = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/ScalingTo5/Scale3' 
ScaleFour = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/ScalingTo5/Scale4'
ScaleFive = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/ScalingTo5/Scale5'


readScaleOne = read_text_files(scaleOne, '1', 3, "10ms-Latency")
readScaleTwo = read_text_files(scaleTwo, '2', 3, "10ms-Latency")
readScaleThree = read_text_files(ScaleThree, '3', 3, "10ms-Latency")
readScaleFour = read_text_files(ScaleFour, '4', 3, "10ms-Latency")
readScaleFive = read_text_files(ScaleFive, '5', 3, "10ms-Latency")

oneDF = pd.DataFrame(readScaleOne)
twoDF = pd.DataFrame(readScaleTwo)
threeDF = pd.DataFrame(readScaleThree)
fourDF = pd.DataFrame(readScaleFour)
fiveDF = pd.DataFrame(readScaleFive)


proxyDF = pd.concat([
    oneDF, twoDF, threeDF, fourDF,fiveDF])

proxyDF.rename(columns={'ScaleFactor': 'Machines','Label':"Latency"}, inplace=True)

proxyDF
average_metrics = proxyDF.groupby('Machines').agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics.rename(columns={"Machines":"Scalefactor"})

Unnamed: 0,Scalefactor,Latency(ms),Throughput
0,1,832.333333,41.722222
1,2,918.0,74.655556
2,3,841.0,127.933333
3,4,909.333333,171.7
4,5,871.0,213.044444


#### Per Layer Scaling (Waffle)

In [7]:
# Inflight - 3000 -->Waffle

#resolver

resolverScalingOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/PerLayerScaling/Resolver/1_3_3'
resolverScalingTwo = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/PerLayerScaling/Resolver/2_3_3'
resolverScalingThree = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/PerLayerScaling/Resolver/3_3_3'

oneResolver = read_text_files(resolverScalingOne,'1',3,"Resolver")
twoResolver = read_text_files(resolverScalingTwo,'2',3,"Resolver")
threeResolver = read_text_files(resolverScalingThree,'3',3,"Resolver")

oneResolverDF = pd.DataFrame(oneResolver)
twoResolverDF = pd.DataFrame(twoResolver)
threeResolverDF = pd.DataFrame(threeResolver)

resolverDF = pd.concat([oneResolverDF, twoResolverDF, threeResolverDF])


#Batcher
BatcherScalingOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/PerLayerScaling/Batcher/3_1_3'
BatcherScalingTwo = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/PerLayerScaling/Batcher/3_2_3'
BatcherScalingThree = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/PerLayerScaling/Batcher/3_3_3'

oneBatcher = read_text_files(BatcherScalingOne,'1',3,"Batcher")
twoBatcher = read_text_files(BatcherScalingTwo,'2',3,"Batcher")
threeBatcher = read_text_files(BatcherScalingThree,'3',3,"Batcher")

oneBatcherDF = pd.DataFrame(oneBatcher)
twoBatcherDF = pd.DataFrame(twoBatcher)
threeBatcherDF = pd.DataFrame(threeBatcher)

batcherDF = pd.concat([oneBatcherDF, twoBatcherDF, threeBatcherDF])

#Proxy

ProxyScalingOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/PerLayerScaling/Proxy/3_3_1'
ProxyScalingTwo = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/PerLayerScaling/Proxy/3_3_2'
ProxyScalingThree = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/PerLayerScaling/Proxy/3_3_3'

oneProxy = read_text_files(ProxyScalingOne,'1',3,"Executor")
twoProxy = read_text_files(ProxyScalingTwo,'2',3,"Executor")
threeProxy = read_text_files(ProxyScalingThree,'3',3,"Executor")

oneProxyDF = pd.DataFrame(oneProxy)
twoProxyDF = pd.DataFrame(twoProxy)
threeProxyDF = pd.DataFrame(threeProxy)

proxyDF = pd.concat([oneProxyDF, twoProxyDF, threeProxyDF,oneBatcherDF,twoBatcherDF,threeBatcherDF,oneResolverDF,twoResolverDF,threeResolverDF])
# data = proxyDF.groupby(['ScaleFactor','Label'])['Throughput'].mean().reset_index()


average_metrics = proxyDF.groupby(['Label','ScaleFactor']).agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics.rename(columns={"Machines":"Scalefactor"})

Unnamed: 0,Label,ScaleFactor,Latency(ms),Throughput
0,Batcher,1,927.666667,3117.022222
1,Batcher,2,913.0,3201.9
2,Batcher,3,805.333333,3641.588889
3,Executor,1,3366.333333,887.022222
4,Executor,2,1225.333333,2380.722222
5,Executor,3,805.333333,3641.588889
6,Resolver,1,819.666667,3593.4
7,Resolver,2,817.666667,3591.133333
8,Resolver,3,805.333333,3641.588889


#### Per Layer Scaling (ORAM)

In [8]:
# Inflight - 100 -->ORAM

#resolver

resolverScalingOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/PerLayer/Resolver/1_3_3'
resolverScalingTwo = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/PerLayer/Resolver/2_3_3'
resolverScalingThree = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/PerLayer/Resolver/3_3_3'

oneResolver = read_text_files(resolverScalingOne,'1',3,"Resolver")
twoResolver = read_text_files(resolverScalingTwo,'2',3,"Resolver")
threeResolver = read_text_files(resolverScalingThree,'3',3,"Resolver")

oneResolverDF = pd.DataFrame(oneResolver)
twoResolverDF = pd.DataFrame(twoResolver)
threeResolverDF = pd.DataFrame(threeResolver)

resolverDF = pd.concat([oneResolverDF, twoResolverDF, threeResolverDF])


#Batcher
BatcherScalingOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/PerLayer/Batcher/3_1_3'
BatcherScalingTwo = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/PerLayer/Batcher/3_2_3'
BatcherScalingThree = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/PerLayer/Batcher/3_3_3'

oneBatcher = read_text_files(BatcherScalingOne,'1',3,"Batcher")
twoBatcher = read_text_files(BatcherScalingTwo,'2',3,"Batcher")
threeBatcher = read_text_files(BatcherScalingThree,'3',3,"Batcher")

oneBatcherDF = pd.DataFrame(oneBatcher)
twoBatcherDF = pd.DataFrame(twoBatcher)
threeBatcherDF = pd.DataFrame(threeBatcher)

batcherDF = pd.concat([oneBatcherDF, twoBatcherDF, threeBatcherDF])

#Proxy

ProxyScalingOne = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/PerLayer/Proxy/3_3_1'
ProxyScalingTwo = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/PerLayer/Proxy/3_3_2'
ProxyScalingThree = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/PerLayer/Proxy/3_3_3'

oneProxy = read_text_files(ProxyScalingOne,'1',3,"Executor")
twoProxy = read_text_files(ProxyScalingTwo,'2',3,"Executor")
threeProxy = read_text_files(ProxyScalingThree,'3',3,"Executor")

oneProxyDF = pd.DataFrame(oneProxy)
twoProxyDF = pd.DataFrame(twoProxy)
threeProxyDF = pd.DataFrame(threeProxy)

proxyDF = pd.concat([oneProxyDF, twoProxyDF, threeProxyDF,oneBatcherDF,twoBatcherDF,threeBatcherDF,oneResolverDF,twoResolverDF,threeResolverDF])



average_metrics = proxyDF.groupby(['Label','ScaleFactor']).agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics.rename(columns={"Machines":"Scalefactor"})


Unnamed: 0,Label,ScaleFactor,Latency(ms),Throughput
0,Batcher,1,992.0,100.044444
1,Batcher,2,804.333333,121.233333
2,Batcher,3,784.333333,125.155556
3,Executor,1,3429.666667,28.033333
4,Executor,2,1313.333333,74.188889
5,Executor,3,784.333333,125.155556
6,Resolver,1,791.333333,124.177778
7,Resolver,2,782.0,125.844444
8,Resolver,3,784.333333,125.155556


#### Join Bloom (Waffle)

In [9]:
completeBloomNew = "/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/JoinBloom/Bloom"
completeDefaultNew = "/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/JoinBloom/Default"
completeHybridNew = "/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/JoinBloom/Hybrid"


completeBloomNewDF = pd.DataFrame(read_text_files(completeBloomNew, "3", "3", "Bloom"))
completeDefaultNewDF = pd.DataFrame(read_text_files(completeDefaultNew, "3", "3", "No Bloom"))
completeHybridNewDF = pd.DataFrame(read_text_files(completeHybridNew, "3", "3", "Hybrid"))

combinedDF = pd.concat([completeBloomNewDF,completeDefaultNewDF,completeHybridNewDF])
combinedDF


average_metrics = combinedDF.groupby(['Label']).agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics.rename(columns={"Machines":"Scalefactor"})

Unnamed: 0,Label,Latency(ms),Throughput
0,Bloom,523.333333,1879.366667
1,Hybrid,546.0,1800.466667
2,No Bloom,1396.666667,694.4


### Join Bloom (ORAM)

In [10]:
completeBloomNew = "/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/JoinBloom/Bloom"
completeDefaultNew = "/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/JoinBloom/Default"
completeHybridNew = "/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/JoinBloom/Hybrid"


completeBloomNewDF = pd.DataFrame(read_text_files(completeBloomNew, "3", "3", "Bloom"))
completeDefaultNewDF = pd.DataFrame(read_text_files(completeDefaultNew, "3", "3", "No Bloom"))
completeHybridNewDF = pd.DataFrame(read_text_files(completeHybridNew, "3", "3", "Hybrid"))

combinedDF = pd.concat([completeBloomNewDF,completeDefaultNewDF,completeHybridNewDF])
# combinedDF = combinedDF.groupby(["Label"])['Throughput'].mean().reset_index()
combinedDF

average_metrics = combinedDF.groupby(['Label']).agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics.rename(columns={"Machines":"Scalefactor"})

Unnamed: 0,Label,Latency(ms),Throughput
0,Bloom,691.333333,142.222222
1,Hybrid,855.333333,114.633333
2,No Bloom,4751.666667,19.655556


In [11]:
## Values Present in Raw Data (Txt file)
data = {
    'Total Keys': [311472,311732,285123,
                   427416,413860,457824,
                   1320152,1320742,1119386],

    'Total Requests': [75859,75923,69272,
                       69225,66970,74296,
                       28782,28801,24088],
    'Type': ["Bloom","Bloom","Bloom","Hybrid","Hybrid","Hybrid","No Bloom","No Bloom","No Bloom"]
}
df_keys_requests = pd.DataFrame(data)
df_keys_requests['Req/S'] = df_keys_requests['Total Keys']/df_keys_requests['Total Requests']
avgReqs = df_keys_requests.groupby(["Type"])['Req/S'].mean().reset_index()
avgReqs

Unnamed: 0,Type,Req/S
0,Bloom,4.109274
1,Hybrid,6.172082
2,No Bloom,46.065158


### Range Bloom (Waffle)

In [12]:
selectivity5Bloom  = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/RangeBloom/DefaultRange/Selectivity5/Bloom'
selectivity5NoBloom  = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/RangeBloom/DefaultRange/Selectivity5/Default'

selectivity10Bloom = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/RangeBloom/DefaultRange/Selectivity10/Bloom'
selectivity10NoBloom = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/RangeBloom/DefaultRange/Selectivity10/Default'

selectivity20Bloom = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/RangeBloom/DefaultRange/Selectivity20/Bloom'
selectivity20NoBloom = '/home/haseeb/Desktop/Plotting/OverleafGraphs/WaffleData/RangeBloom/DefaultRange/Selectivity20/Default'

selectivity5BloomData = read_text_files(selectivity5Bloom, '5', 3, "Bloom")
selectivity5NoBloomData = read_text_files(selectivity5NoBloom, '5', 3, "noBloom")

selectivity10BloomData = read_text_files(selectivity10Bloom, '10', 3, "Bloom")
selectivity10NoBloomData = read_text_files(selectivity10NoBloom, '10', 3, "noBloom")

selectivity20BloomData = read_text_files(selectivity20Bloom, '20', 3, "Bloom")
selectivity20NoBloomData = read_text_files(selectivity20NoBloom, '20', 3, "noBloom")

selectivity5BloomDF = pd.DataFrame(selectivity5BloomData)
selectivity5NoBloomDF = pd.DataFrame(selectivity5NoBloomData)
selectivity10BloomDF = pd.DataFrame(selectivity10BloomData)
selectivity10NoBloomDF = pd.DataFrame(selectivity10NoBloomData)
selectivity20BloomDF = pd.DataFrame(selectivity20BloomData)
selectivity20NoBloomDF = pd.DataFrame(selectivity20NoBloomData)

proxyDF = pd.concat([selectivity5BloomDF, selectivity5NoBloomDF, selectivity10BloomDF, selectivity10NoBloomDF, selectivity20BloomDF, selectivity20NoBloomDF])
proxyDF.rename(columns={'ScaleFactor': 'Selectivity', 'Label':"Filter"}, inplace=True)

# Assuming proxyDF is your DataFrame
order=["5","10","20"]
proxyDF['Selectivity'] = pd.Categorical(proxyDF['Selectivity'], categories=order, ordered=True)
proxyDF = proxyDF.sort_values('Selectivity')
proxyDF
average_metrics = proxyDF.groupby(['Filter','Selectivity']).agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics

  average_metrics = proxyDF.groupby(['Filter','Selectivity']).agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()


Unnamed: 0,Filter,Selectivity,Latency(ms),Throughput
0,Bloom,5,474.666667,2051.044444
1,Bloom,10,660.0,1502.133333
2,Bloom,20,919.666667,1056.511111
3,noBloom,5,1357.0,715.177778
4,noBloom,10,2674.0,355.233333
5,noBloom,20,5354.666667,170.544444


### Range Bloom (ORAM)

In [13]:
selectivity5Bloom  = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/RangeBloom/DefaultRange/Selectivity5/Bloom'
selectivity5NoBloom  = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/RangeBloom/DefaultRange/Selectivity5/NoBloom'

selectivity10Bloom = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/RangeBloom/DefaultRange/Selectivity10/Bloom'
selectivity10NoBloom = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/RangeBloom/DefaultRange/Selectivity10/NoBloom'

selectivity20Bloom = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/RangeBloom/DefaultRange/Selectivity20/Bloom'
selectivity20NoBloom = '/home/haseeb/Desktop/Plotting/OverleafGraphs/OramData/RangeBloom/DefaultRange/Selectivity20/NoBloom'

selectivity5BloomData = read_text_files(selectivity5Bloom, '5', 3, "Bloom")
selectivity5NoBloomData = read_text_files(selectivity5NoBloom, '5', 3, "noBloom")

selectivity10BloomData = read_text_files(selectivity10Bloom, '10', 3, "Bloom")
selectivity10NoBloomData = read_text_files(selectivity10NoBloom, '10', 3, "noBloom")

selectivity20BloomData = read_text_files(selectivity20Bloom, '20', 3, "Bloom")
selectivity20NoBloomData = read_text_files(selectivity20NoBloom, '20', 3, "noBloom")

selectivity5BloomDF = pd.DataFrame(selectivity5BloomData)
selectivity5NoBloomDF = pd.DataFrame(selectivity5NoBloomData)
selectivity10BloomDF = pd.DataFrame(selectivity10BloomData)
selectivity10NoBloomDF = pd.DataFrame(selectivity10NoBloomData)
selectivity20BloomDF = pd.DataFrame(selectivity20BloomData)
selectivity20NoBloomDF = pd.DataFrame(selectivity20NoBloomData)

proxyDF = pd.concat([selectivity5BloomDF, selectivity5NoBloomDF, selectivity10BloomDF, selectivity10NoBloomDF, selectivity20BloomDF, selectivity20NoBloomDF])
proxyDF.rename(columns={'ScaleFactor': 'Selectivity', 'Label':"Filter"}, inplace=True)
# proxyDF = proxyDF.groupby(["Filter","Selectivity"])['Throughput'].mean().reset_index()


# ax = sns.lineplot(data=proxyDF, x='Selectivity', y='Throughput', hue='Filter')
# ax.set_xlabel("Selectivity (Range Size)")
# # proxyDF

# Assuming proxyDF is your DataFrame
order=["5","10","20"]
proxyDF['Selectivity'] = pd.Categorical(proxyDF['Selectivity'], categories=order, ordered=True)
proxyDF = proxyDF.sort_values('Selectivity')

average_metrics = proxyDF.groupby(['Filter','Selectivity']).agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()
average_metrics


  average_metrics = proxyDF.groupby(['Filter','Selectivity']).agg({'Latency(ms)': 'mean', 'Throughput': 'mean'}).reset_index()


Unnamed: 0,Filter,Selectivity,Latency(ms),Throughput
0,Bloom,5,514.333333,191.533333
1,Bloom,10,1192.333333,79.833333
2,Bloom,20,2518.0,29.177778
3,noBloom,5,1718.666667,55.744444
4,noBloom,10,5661.0,15.988889
5,noBloom,20,11735.333333,7.0
