In [18]:
import os
import pandas as pd
from tabulate import tabulate
import matplotlib.pyplot as plt


In [2]:
current_dir = os.getcwd()
parent_dir = os.path.abspath(os.path.join(current_dir, os.pardir))

In [3]:
column_types = {
    "timestampReceive": "int64",
    "timeParseNanosecond": "int64",
    "u": "int64",
    "s": "str",
    "b": "float64",
    "B": "float64",
    "a": "float64",
    "A": "float64",
    "Spread": "float64"
}
log_file_path_gcc = os.path.join(parent_dir, 'Logs', 'log_gcc.log')
df_gcc = pd.read_csv(log_file_path_gcc, delimiter=';', dtype=column_types)
df_gcc.columns = df_gcc.columns.str.strip()

log_file_path_rust = os.path.join(parent_dir, 'Logs', 'log_rust.log')
df_rust = pd.read_csv(log_file_path_rust, delimiter=';', dtype=column_types)
df_rust.columns = df_rust.columns.str.strip()

log_file_path_csharp = os.path.join(parent_dir, 'Logs', 'log_csharp.log')
df_csharp = pd.read_csv(log_file_path_csharp, delimiter=';', dtype=column_types)
df_csharp.columns = df_csharp.columns.str.strip()

In [4]:
df_gcc

Unnamed: 0,timestampReceive,timeParseNanosecond,u,s,b,B,a,A,Spread
0,1724537982409,14197,50779460213,BTCUSDT,63839.99,4.98518,63841.99,0.00009,2.00
1,1724537982409,1974,50779460214,BTCUSDT,63841.02,0.33014,63841.99,0.00009,0.97
2,1724537982409,1412,50779460217,BTCUSDT,63841.02,0.34572,63841.99,0.00009,0.97
3,1724537982409,1032,50779460228,BTCUSDT,63841.02,0.33014,63841.99,0.00009,0.97
4,1724537982409,932,50779460231,BTCUSDT,63841.98,0.01558,63841.99,0.00009,0.01
...,...,...,...,...,...,...,...,...,...
24214,1724538342179,2935,50779633317,BTCUSDT,63809.98,0.59561,63809.99,3.61297,0.01
24215,1724538342215,4138,50779633323,BTCUSDT,63809.98,0.59581,63809.99,3.61297,0.01
24216,1724538342221,1984,50779633324,BTCUSDT,63809.98,0.59581,63809.99,3.61257,0.01
24217,1724538342611,5150,50779633410,BTCUSDT,63809.98,0.57314,63809.99,3.61257,0.01


In [5]:
df_csharp

Unnamed: 0,timestampReceive,timeParseNanosecond,u,s,b,B,a,A,Spread
0,1724537984612,74567582,50779460761,BTCUSDT,63841.99,5.19904,63842.00,0.65545,0010000000002037268
1,1724537984689,24347,50779460770,BTCUSDT,63841.99,5.19904,63842.00,0.65533,0010000000002037268
2,1724537984703,18485,50779460776,BTCUSDT,63841.99,5.19904,63842.00,0.59163,0010000000002037268
3,1724537984799,48714,50779460786,BTCUSDT,63841.99,5.45726,63842.00,0.59163,0010000000002037268
4,1724537984836,953162,50779460792,BTCUSDT,63841.99,5.45712,63842.00,0.59163,0010000000002037268
...,...,...,...,...,...,...,...,...,...
24121,1724538342221,3176,50779633324,BTCUSDT,63809.98,0.59581,63809.99,3.61257,000999999999476131
24122,1724538342611,43502,50779633410,BTCUSDT,63809.98,0.57314,63809.99,3.61257,000999999999476131
24123,1724538342634,15099,50779633414,BTCUSDT,63809.98,0.57314,63809.99,3.61297,000999999999476131
24124,1724538342642,9227,50779633416,BTCUSDT,63809.98,0.59617,63809.99,3.61297,000999999999476131


In [6]:
df_rust

Unnamed: 0,timestampReceive,timeParseNanosecond,u,s,b,B,a,A,Spread
0,1724537985739,11011,50779460976,BTCUSDT,63841.99,6.39441,63842.00,0.23721,0.01
1,1724537985783,4539,50779460990,BTCUSDT,63841.99,6.39441,63842.00,0.22531,0.01
2,1724537985876,4278,50779461002,BTCUSDT,63841.99,6.65572,63842.00,0.22531,0.01
3,1724537985877,1342,50779461003,BTCUSDT,63841.99,6.65572,63842.00,0.27778,0.01
4,1724537985933,4168,50779461009,BTCUSDT,63841.99,6.64572,63842.00,0.27778,0.01
...,...,...,...,...,...,...,...,...,...
24019,1724538340540,6001,50779632990,BTCUSDT,63809.98,1.23502,63809.99,3.40236,0.01
24020,1724538340640,5891,50779633010,BTCUSDT,63809.98,1.23393,63809.99,3.40236,0.01
24021,1724538340644,3116,50779633013,BTCUSDT,63809.98,1.20610,63809.99,3.40236,0.01
24022,1724538340690,4529,50779633020,BTCUSDT,63809.98,1.20570,63809.99,3.40236,0.01


In [7]:
common_u = set(df_gcc['u']) & set(df_csharp['u']) & set(df_rust['u'])

In [8]:
df_gcc = df_gcc[df_gcc['u'].isin(common_u)]
df_csharp = df_csharp[df_csharp['u'].isin(common_u)]
df_rust = df_rust[df_rust['u'].isin(common_u)]

In [9]:

# Dados para a tabela
data = [
    ["Rust", df_rust['timeParseNanosecond'].mean(), df_rust['timeParseNanosecond'].median(), df_rust['timeParseNanosecond'].std()],
    ["C++", df_gcc['timeParseNanosecond'].mean(), df_gcc['timeParseNanosecond'].median(), df_gcc['timeParseNanosecond'].std()],
    ["C#", df_csharp['timeParseNanosecond'].mean(), df_csharp['timeParseNanosecond'].median(), df_csharp['timeParseNanosecond'].std()]
]

# Cabeçalhos da tabela
headers = ["Language", "Mean (ns)", "Median (ns)", "Std Dev (ns)"]

# Imprimir a tabela formatada
print(tabulate(data, headers, tablefmt="pretty", floatfmt=".2f"))

+----------+--------------------+-------------+--------------------+
| Language |     Mean (ns)      | Median (ns) |    Std Dev (ns)    |
+----------+--------------------+-------------+--------------------+
|   Rust   | 2301.522311022311  |   1753.0    | 2130.638758849336  |
|   C++    | 3056.8237595737596 |   2425.0    | 2398.8878779692154 |
|    C#    | 8232.229187479188  |   5475.5    | 7919.596080867382  |
+----------+--------------------+-------------+--------------------+


In [10]:
merged_df = df_rust[['u', 'timestampReceive']].merge(
    df_gcc[['u', 'timestampReceive']], on='u', suffixes=('_rust', '_gcc')).merge(
    df_csharp[['u', 'timestampReceive']], on='u')

merged_df.rename(columns={'timestampReceive': 'timestampReceive_csharp'}, inplace=True)

merged_df['diff_rust_gcc'] = merged_df['timestampReceive_rust'] - merged_df['timestampReceive_gcc']
merged_df['diff_rust_csharp'] = merged_df['timestampReceive_rust'] - merged_df['timestampReceive_csharp']

merged_df['diff_gcc_csharp'] = merged_df['timestampReceive_gcc'] - merged_df['timestampReceive_csharp']
merged_df['diff_gcc_rust'] = merged_df['timestampReceive_gcc'] - merged_df['timestampReceive_rust']

merged_df['diff_csharp_rust'] = merged_df['timestampReceive_csharp'] - merged_df['timestampReceive_rust']
merged_df['diff_csharp_gcc'] = merged_df['timestampReceive_csharp'] - merged_df['timestampReceive_gcc']

In [11]:
print(tabulate(merged_df.head(40000), headers='keys', tablefmt='pretty'))

+-------+-------------+-----------------------+----------------------+-------------------------+---------------+------------------+-----------------+---------------+------------------+-----------------+
|       |      u      | timestampReceive_rust | timestampReceive_gcc | timestampReceive_csharp | diff_rust_gcc | diff_rust_csharp | diff_gcc_csharp | diff_gcc_rust | diff_csharp_rust | diff_csharp_gcc |
+-------+-------------+-----------------------+----------------------+-------------------------+---------------+------------------+-----------------+---------------+------------------+-----------------+
|   0   | 50779460976 |     1724537985739     |    1724537985740     |      1724537985739      |      -1       |        0         |        1        |       1       |        0         |       -1        |
|   1   | 50779460990 |     1724537985783     |    1724537985784     |      1724537985783      |      -1       |        0         |        1        |       1       |        0         |    

In [12]:
print("gcc timetotal ", df_gcc['timestampReceive'].sum())
print("rust timetotal ", df_rust['timestampReceive'].sum())
print("c# timetotal ", df_csharp['timestampReceive'].sum())
print("merged_df diff_rust_csharp ", merged_df['diff_rust_csharp'].sum())

print("gcc - rust: ", df_gcc['timestampReceive'].sum() - df_rust['timestampReceive'].sum())
print("gcc - c#: ", df_gcc['timestampReceive'].sum() - df_csharp['timestampReceive'].sum())

print("rust - c#: ", df_rust['timestampReceive'].sum() - df_csharp['timestampReceive'].sum())

gcc timetotal  41430305073311365
rust timetotal  41430305073311139
c# timetotal  41430305073312492
merged_df diff_rust_csharp  -1353
gcc - rust:  226
gcc - c#:  -1127
rust - c#:  -1353


In [13]:
comparisons = ['diff_rust_gcc', 'diff_rust_csharp', 'diff_gcc_csharp', 'diff_gcc_rust', 'diff_csharp_rust', 'diff_csharp_gcc']
for comparison in comparisons:
    print(f"Comparison: {comparison}")
    print(" Mean: ", merged_df[comparison].mean())
    print(" Median: ", merged_df[comparison].median())
    print(" Std Dev: ", merged_df[comparison].std())
    print() 

Comparison: diff_rust_gcc
 Mean:  -0.009407259407259408
 Median:  0.0
 Std Dev:  4.985028887245876

Comparison: diff_rust_csharp
 Mean:  -0.05631868131868132
 Median:  0.0
 Std Dev:  3.160149721269657

Comparison: diff_gcc_csharp
 Mean:  -0.04691142191142191
 Median:  0.0
 Std Dev:  3.0395972207708013

Comparison: diff_gcc_rust
 Mean:  0.009407259407259408
 Median:  0.0
 Std Dev:  4.985028887245876

Comparison: diff_csharp_rust
 Mean:  0.05631868131868132
 Median:  0.0
 Std Dev:  3.160149721269657

Comparison: diff_csharp_gcc
 Mean:  0.04691142191142191
 Median:  0.0
 Std Dev:  3.0395972207708013



In [14]:
statistics = []
for comparison in comparisons:
    mean = merged_df[comparison].mean()
    median = merged_df[comparison].median()
    std_dev = merged_df[comparison].std()
    statistics.append([comparison, mean, median, std_dev])

print(tabulate(statistics, headers=['Comparison', 'Mean', 'Median', 'Std Dev'], tablefmt='pretty'))

+------------------+-----------------------+--------+--------------------+
|    Comparison    |         Mean          | Median |      Std Dev       |
+------------------+-----------------------+--------+--------------------+
|  diff_rust_gcc   | -0.009407259407259408 |  0.0   | 4.985028887245876  |
| diff_rust_csharp | -0.05631868131868132  |  0.0   | 3.160149721269657  |
| diff_gcc_csharp  | -0.04691142191142191  |  0.0   | 3.0395972207708013 |
|  diff_gcc_rust   | 0.009407259407259408  |  0.0   | 4.985028887245876  |
| diff_csharp_rust |  0.05631868131868132  |  0.0   | 3.160149721269657  |
| diff_csharp_gcc  |  0.04691142191142191  |  0.0   | 3.0395972207708013 |
+------------------+-----------------------+--------+--------------------+


In [15]:
# Filtrar e ordenar pela maior diferença onde C# é mais rápido que Rust
filtered_stats = merged_df[merged_df['diff_csharp_rust'] < 0].copy()
filtered_stats['abs_diff_csharp_rust'] = filtered_stats['diff_csharp_rust'].abs()
filtered_stats = filtered_stats.sort_values(by='abs_diff_csharp_rust', ascending=False)

# Preparar dados para exibição
filtered_statistics = []
for _, row in filtered_stats.iterrows():
    filtered_statistics.append([
        row['u'], 
        row['timestampReceive_csharp'], 
        row['timestampReceive_rust'], 
        row['timestampReceive_gcc'], 
        row['diff_csharp_rust']
    ])

# Exibir os resultados em formato tabular
print(tabulate(filtered_statistics, headers=['u','timestampReceive_gcc', 'timestampReceive_csharp', 'timestampReceive_rust', 'diff_csharp_rust'], tablefmt='pretty'))

+-------------+----------------------+-------------------------+-----------------------+------------------+
|      u      | timestampReceive_gcc | timestampReceive_csharp | timestampReceive_rust | diff_csharp_rust |
+-------------+----------------------+-------------------------+-----------------------+------------------+
| 50779461571 |    1724537987220     |      1724537987307      |     1724537987211     |       -87        |
| 50779461568 |    1724537987220     |      1724537987307      |     1724537987211     |       -87        |
| 50779461566 |    1724537987220     |      1724537987307      |     1724537987210     |       -87        |
| 50779461565 |    1724537987220     |      1724537987307      |     1724537987210     |       -87        |
| 50779461563 |    1724537987220     |      1724537987307      |     1724537987210     |       -87        |
| 50779461560 |    1724537987220     |      1724537987307      |     1724537987210     |       -87        |
| 50779461574 |    172453798

In [16]:
# Filtrar e ordenar pela maior diferença onde Rust é mais rápido que C#
filtered_stats = merged_df[merged_df['diff_rust_csharp'] < 0].copy()
filtered_stats['abs_diff_rust_csharp'] = filtered_stats['diff_rust_csharp'].abs()
filtered_stats = filtered_stats.sort_values(by='abs_diff_rust_csharp', ascending=False)

# Preparar dados para exibição
filtered_statistics = []
for _, row in filtered_stats.iterrows():
    filtered_statistics.append([
        row['u'], 
        row['timestampReceive_rust'], 
        row['timestampReceive_csharp'], 
        row['diff_rust_csharp']
    ])

# Exibir os resultados em formato tabular
print(tabulate(filtered_statistics, headers=['u', 'timestampReceive_rust', 'timestampReceive_csharp', 'diff_rust_csharp'], tablefmt='pretty'))

+-------------+-----------------------+-------------------------+------------------+
|      u      | timestampReceive_rust | timestampReceive_csharp | diff_rust_csharp |
+-------------+-----------------------+-------------------------+------------------+
| 50779607065 |     1724538304033     |      1724538304037      |        -4        |
| 50779462023 |     1724537987400     |      1724537987403      |        -3        |
| 50779462031 |     1724537987400     |      1724537987403      |        -3        |
| 50779462032 |     1724537987400     |      1724537987403      |        -3        |
| 50779462034 |     1724537987400     |      1724537987403      |        -3        |
| 50779462036 |     1724537987400     |      1724537987403      |        -3        |
| 50779462046 |     1724537987400     |      1724537987403      |        -3        |
| 50779462022 |     1724537987400     |      1724537987403      |        -3        |
| 50779462050 |     1724537987400     |      1724537987403      |

In [1]:
# Criar o gráfico
plt.figure(figsize=(30, 30))

# Plotar a diferença entre Rust e C# em relação ao campo 'u'
plt.scatter(merged_df['u'], merged_df['diff_rust_csharp'], alpha=0.6, c='blue', label='diff_rust_csharp')

# Adicionar títulos e rótulos
plt.title('Comparação entre diff_rust_csharp e u')
plt.xlabel('u')
plt.ylabel('diff_rust_csharp')
plt.axhline(0, color='black', linestyle='--', linewidth=0.7)  # Linha horizontal para referência
plt.legend()
plt.grid(True)

# Exibir o gráfico
plt.show()

NameError: name 'plt' is not defined