In [1]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import pearsonr, ttest_rel


In [2]:
df = pd.read_csv("data/output.csv", engine="pyarrow")
df['votes'] = df['votes'].apply(ast.literal_eval)
df = df[df["base_vote"]!=0].reset_index()

df["median"] = [np.median(v) for v in df["votes"]]
df["mean"] = [np.mean(v) for v in df["votes"]]

df.head()

Unnamed: 0,index,project_name,votes,number_of_votes,base_vote,base_vote_round,median,mean
0,11,AW House/Komorebi's Hacker House,"[77000.0, 77000.0, 50000.0, 4000000.0, 50000.0...",46,76923.0,77000.0,77000.0,239371.73913
1,14,Across Protocol,"[50000.0, 25000.0, 39000.0, 200000.0, 32000.0,...",42,38462.0,39000.0,39000.0,105635.714286
2,16,Aerodrome Finance,"[100000.0, 100000.0, 25000.0, 25000.0, 50000.0...",51,615385.0,620000.0,110000.0,428627.45098
3,19,Airstack,"[77000.0, 150000.0, 77000.0, 25000.0, 10000.0,...",34,76923.0,77000.0,48500.0,47794.117647
4,20,Alchemy,"[100.0, 77000.0, 1900.0, 50000.0, 75000.0, 770...",59,76923.0,77000.0,75000.0,102255.949153


In [3]:
# get all data and check result without zero votes
median_without_0 = []
mean_without_0 = []
n_0 = []
for i, row in df.iterrows():
    votes = [v for v in row["votes"] if v!=0]
    n_0.append(len([v for v in row["votes"] if v==0]))
    median_without_0.append(np.median(votes))
    mean_without_0.append(np.mean(votes))
    
    
df["median_without_0"] = median_without_0
df["mean_without_0"] = mean_without_0
df["n_0"] = n_0


In [4]:
df.sort_values("n_0", ascending=0)[["project_name", 'n_0', 'number_of_votes']]

Unnamed: 0,project_name,n_0,number_of_votes
4,Alchemy,12,59
35,Messari,8,39
55,Tenderly,7,74
19,Flipside Crypto,5,47
10,Covalent,5,39
...,...,...,...
52,Superscan,0,45
53,Surreal,0,28
56,Test in Prod,0,70
59,Vectorized,0,72


In [11]:
colors = ['gold', 'mediumturquoise', 'darkorange', 'lightgreen']
n = 20
fig = go.Figure(data=[go.Pie(labels=df.sort_values("n_0", ascending=0)["project_name"][:n],
                             values=df.sort_values("n_0", ascending=0)["n_0"][:n])])
fig.update_traces(hoverinfo='label+percent', textinfo='label+percent', textfont_size=20,
                  marker=dict(colors=colors, line=dict(color='#000000', width=2)))
fig.update(
    # layout_title_text='Percent of 0 votes for ten Project',
    layout_showlegend=False
)

# fig.update_layout(
#     # xaxis=dict(title='Project Name'),
#     # yaxis=dict(title='Difference in Median'),
#     height=600,
#     template='plotly_dark',
#     margin={'t':0,'b':0,},
# )
fig.update_layout(
    title={
        'text': '% 0 Vote for Dapps',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 32}
    },
    # xaxis=dict(title='Dapps'),
    # yaxis=dict(title='Median - Median 0 votes'),
    height=780, width=1020,
    template='plotly_dark',
    margin={'t':80,'b':10},
    showlegend=False,
)
fig.write_image("data/perc_0_votes.png", height=780, width=1020)
fig.show()

In [5]:
df['difference_0'] = df['median'] - df['median_without_0']
n = 20

fig = go.Figure()

fig.add_trace(go.Bar(
    x=df.sort_values("difference_0", ascending=1)['project_name'][:n],
    y=df.sort_values("difference_0", ascending=1)['difference_0'][:n],
    name='',
    marker_color='#ff0420'
))

# 
fig.update_traces(textfont_size=20)
# fig.update(
#     layout_showlegend=False
# )

fig.update_layout(
    title={
        'text': 'Median vs Median without 0 Vote',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 32}
    },
    xaxis=dict(title='Dapps', title_font={'size': 20},
        tickfont={'size': 20}),
    yaxis=dict(title='Median - Median 0 votes', title_font={'size': 20},
        tickfont={'size': 20}),
    height=780, width=1020,
    template='plotly_dark',
    margin={'t':80,'b':10},
    showlegend=False,
)
fig.write_image("data/median_0_votes.png", height=780, width=1020)
fig.show()

In [4]:
df['difference_0'] = df['median_without_0'] - df['median']
print(f"Mean difference: {df['difference_0'].mean()}")
print(f"Standard deviation of difference: {df['difference_0'].std()}")

print(f"Mean distance from median {(df["difference_0"]/df['median']).abs().mean():.2%}")

fig = go.Figure()

fig.add_trace(go.Bar(
    x=df['project_name'],
    y=df['difference_0'],
    name='Difference in Median',
    marker_color='#6d6df1'
))

fig.add_hline(
    # x=df['project_name'],
    y=df['difference_0'].mean(),
    line_color='red'
)

fig.update_layout(
    title='Difference in Median with and without 0 Vote',
    xaxis=dict(title='Project Name'),
    yaxis=dict(title='Difference in Median'),
    height=600,
    template='plotly_dark',
    margin={'t':0,'b':0,},
)


fig.show()
# fig.write_image("data/median_0.svg")

Mean difference: 2540.9253499222395
Standard deviation of difference: 6685.710697468462
Mean distance from median inf%


In [6]:
median_without_base = []
mean_without_base = []
for i, row in df.iterrows():
    if row["base_vote"] != 0:
        row["votes"].remove(row["base_vote_round"])
        median_without_base.append(np.median(row["votes"]))
        mean_without_base.append(np.mean(row["votes"]))
    else:
        median_without_base.append(np.median(row["votes"]))
        mean_without_base.append(np.mean(row["votes"]))
    
    
df["median_without_base"] = median_without_base
df["mean_without_base"] = mean_without_base


In [7]:
df['difference'] = df['median_without_base'] - df['median']
print(f"Mean difference: {df['difference'].mean()}")
print(f"Standard deviation of difference: {df['difference'].std()}")


correlation, p_value_corr = pearsonr(df['median'], df['median_without_base'])
print(f"Correlation between median and median_without_base: {correlation}, P-value: {p_value_corr}")

print(f"Perc of exact the same vote: {(df["difference"]==0).sum()/len(df):.2%}")
print(f"Mean distance from median {(df["difference"]/df['median']).abs().mean():.2%}")

Mean difference: 521.7391304347826
Standard deviation of difference: 6850.523156605018
Correlation between median and median_without_base: 0.9982469082137659, P-value: 5.336107571129258e-84
Perc of exact the same vote: 62.32%
Mean distance from median 2.57%


In [8]:
(df["difference"]==0).sum()

43

In [9]:
# fig = px.bar(
#     df, x='project_name', y='difference',
#     labels={'project_name': 'Project Name', 'difference': 'Difference in Median'},
#     title='Difference in Median with and without Pollak Vote (higher mean Pollak voted higher)',
#     height=600
# )

# fig.show()

fig = go.Figure()

fig.add_trace(go.Bar(
    x=df.sort_values("difference")['project_name'],
    y=df.sort_values("difference")['difference'],
    # name='Difference in Median',
    marker_color='#6d6df1'
))

# fig.add_hline(
#     # x=df['project_name'],
#     y=df['difference'].mean(),
#     line_color='red'
# )

# fig.update_layout(
#     title='Difference in Median with and without Pollak Vote',
#     xaxis=dict(title='Project Name'),
#     yaxis=dict(title='Difference in Median'),
#     height=600,
#     template='plotly_dark',
#     margin={'t':0,'b':0,},
# )
fig.update_layout(
    title={
        'text': 'Median vs Median without Pollak votes',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 32}
    },
    xaxis=dict(title='Dapps', title_font={'size': 20},
        tickfont={'size': 20}),
    yaxis=dict(title='Median - Median Pollak votes', title_font={'size': 20},
        tickfont={'size': 20}),
    height=780, width=1020,
    template='plotly_dark',
    margin={'t':80,'b':10},
    showlegend=False,
)

fig.show()
fig.write_image("data/median_pollak.png", height=780, width=1020)

In [10]:
df['difference_mean'] = df['mean_without_base'] - df['mean']
print(f"Mean difference: {df['difference_mean'].mean()}")
print(f"Standard deviation of difference: {df['difference_mean'].std()}")


correlation, p_value_corr = pearsonr(df['mean'], df['mean_without_base'])
print(f"Correlation between median and median_without_base: {correlation}, P-value: {p_value_corr}")

print(f"Perc of exact the same vote: {(df["difference_mean"]==0).sum()/len(df):.2%}")

Mean difference: 1189.8013224580534
Standard deviation of difference: 2098.9586459769384
Correlation between median and median_without_base: 0.9999747151434192, P-value: 1.1679933525766535e-145
Perc of exact the same vote: 0.00%


In [11]:
# fig = px.bar(
#     df, x='project_name', y='difference_mean',
#     labels={'project_name': 'Project Name', 'difference': 'Difference in Median'},
#     title='Difference in Mean with and without Pollak Vote (higher mean Pollak vote higher)',
#     height=600
# )

fig = go.Figure()

fig.add_trace(go.Bar(
    x=df.sort_values("difference_mean")['project_name'],
    y=df.sort_values("difference_mean")['difference_mean'],
    name='Difference in Mean',
    marker_color='#6d6df1'
))

# fig.add_hline(
#     # x=df['project_name'],
#     y=df['difference_mean'].mean(),
#     line_color='red'
# )

fig.update_layout(
    title={
        'text': 'Mean vs Mean without Pollak votes',
        'y': 0.95,
        'x': 0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {'size': 32}
    },
    xaxis=dict(title='Dapps', title_font={'size': 20},
        tickfont={'size': 20}),
    yaxis=dict(title='Mean - Mean Pollak votes', title_font={'size': 20},
        tickfont={'size': 20}),
    height=780, width=1020,
    template='plotly_dark',
    margin={'t':80,'b':10},
    showlegend=False,
)

fig.show()
fig.write_image("data/mean_pollak.png", height=780, width=1020)

In [8]:
df['difference_median_0'] = [np.median(np.asarray(v)[np.nonzero(v)]) for v in df["votes"]] - df['median']

print(f"Mean difference: {df['difference_median_0'].abs().mean()}")
print(f"Standard deviation of difference: {df['difference_median_0'].std()}")

Mean difference: 5717.391304347826
Standard deviation of difference: 12835.58482702142


In [23]:
# fig = px.bar(
#     df, x='project_name', y='difference_median_0',
#     labels={'project_name': 'Project Name', 'difference_median_0': 'Difference in Median'},
#     title='Difference in Median with and without 0 Vote',
#     height=600,
#     template="plotly_dark",
    
# )
fig = go.Figure()

fig.add_trace(go.Bar(
    x=df['project_name'],
    y=df['difference_median_0'],
    name='Difference in Median',
    marker_color='#6d6df1'
))

fig.add_hline(
    # x=df['project_name'],
    y=df['difference_median_0'].mean(),
    line_color='red'
)

fig.update_layout(
    title='Difference in Median with and without Base Vote',
    xaxis=dict(title='Project Name'),
    yaxis=dict(title='Difference in Median'),
    height=600,
    template='plotly_dark',
    margin={'t':0,'b':0},
)


fig.show()
fig.write_image("data/median_without_0.svg")

In [10]:
df_expanded = df.explode('votes')

# Grafico della distribuzione dei voti per ciascun progetto
fig = px.histogram(df_expanded, x='votes', color='project_name', 
                   labels={'votes': 'Voti', 'project_name': 'Nome del Progetto'},
                   title='Votes distribution per project', 
                   barmode='overlay', nbins=50)

fig.show()

In [11]:
fig = px.box(df_expanded, x='project_name', y='votes',
             labels={'project_name': 'Nome del Progetto', 'votes': 'Voti'},
             title='Distribuzione dei Voti per Progetto',
             height=800)

fig.show()


In [12]:
df['mean_vote'] = df['votes'].apply(np.mean)
df['median_vote'] = df['votes'].apply(np.median)

fig = px.scatter(df, x='mean_vote', y='median_vote', color='project_name',
                 labels={'mean_vote': 'Media dei Voti', 'median_vote': 'Mediana dei Voti', 'project_name': 'Nome del Progetto'},
                 title='Confronto tra Progetti')

fig.show()


In [13]:
df[df["project_name"]=="Superscan"]

Unnamed: 0,index,project_name,votes,number_of_votes,base_vote,base_vote_round,median,mean,median_without_base,mean_without_base,difference,difference_mean,difference_median_0,mean_vote,median_vote
52,480,Superscan,"[77000.0, 50000.0, 75000.0, 78000.0, 12000.0, ...",45,76923.0,77000.0,65000.0,71977.777778,57500.0,71863.636364,-7500.0,-114.141414,-7500.0,71863.636364,57500.0


In [14]:
len(df)

69