In [1]:
import pandas as pd
import numpy as np
import ast
import matplotlib.pyplot as plt
import plotly.express as px
from scipy.stats import pearsonr, ttest_rel

In [2]:
df = pd.read_csv("data/output.csv", engine="pyarrow")
df['votes'] = df['votes'].apply(ast.literal_eval)
df = df[df["base_vote"]!=0].reset_index()
df.head()

Unnamed: 0,index,project_name,votes,number_of_votes,base_vote,base_vote_round
0,11,AW House/Komorebi's Hacker House,"[77000.0, 77000.0, 50000.0, 4000000.0, 50000.0...",46,76923.0,77000.0
1,14,Across Protocol,"[50000.0, 25000.0, 39000.0, 200000.0, 32000.0,...",42,38462.0,39000.0
2,16,Aerodrome Finance,"[100000.0, 100000.0, 25000.0, 25000.0, 50000.0...",51,615385.0,620000.0
3,19,Airstack,"[77000.0, 150000.0, 77000.0, 25000.0, 10000.0,...",34,76923.0,77000.0
4,20,Alchemy,"[100.0, 77000.0, 1900.0, 50000.0, 75000.0, 770...",59,76923.0,77000.0


In [3]:
df["median"] = [np.median(v) for v in df["votes"]]
df["mean"] = [np.mean(v) for v in df["votes"]]
median_without_base = []
mean_without_base = []
for i, row in df.iterrows():
    if row["base_vote"] != 0:
        row["votes"].remove(row["base_vote_round"])
        median_without_base.append(np.median(row["votes"]))
        mean_without_base.append(np.mean(row["votes"]))
    else:
        median_without_base.append(np.median(row["votes"]))
        mean_without_base.append(np.mean(row["votes"]))
    
    
df["median_without_base"] = median_without_base
df["mean_without_base"] = mean_without_base


In [4]:
df['difference'] = df['median_without_base'] - df['median']
print(f"Mean difference: {df['difference'].mean()}")
print(f"Standard deviation of difference: {df['difference'].std()}")


correlation, p_value_corr = pearsonr(df['median'], df['median_without_base'])
print(f"Correlation between median and median_without_base: {correlation}, P-value: {p_value_corr}")

print(f"Perc of exact the same vote: {(df["difference"]==0).sum()/len(df):.2%}")

Mean difference: 521.7391304347826
Standard deviation of difference: 6850.523156605018
Correlation between median and median_without_base: 0.9982469082137659, P-value: 5.336107571129258e-84
Perc of exact the same vote: 62.32%


In [5]:
fig = px.bar(
    df, x='project_name', y='difference',
    labels={'project_name': 'Project Name', 'difference': 'Difference in Median'},
    title='Difference in Median with and without Base Vote (higher mean base vote higher)',
    height=600
)

fig.show()

In [13]:
df['difference_mean'] = df['mean_without_base'] - df['mean']
print(f"Mean difference: {df['difference_mean'].mean()}")
print(f"Standard deviation of difference: {df['difference_mean'].std()}")


correlation, p_value_corr = pearsonr(df['mean'], df['mean_without_base'])
print(f"Correlation between median and median_without_base: {correlation}, P-value: {p_value_corr}")

print(f"Perc of exact the same vote: {(df["difference_mean"]==0).sum()/len(df):.2%}")

Mean difference: 1189.8013224580534
Standard deviation of difference: 2098.9586459769384
Correlation between median and median_without_base: 0.9999747151434192, P-value: 1.1679933525766535e-145
Perc of exact the same vote: 0.00%


In [12]:
fig = px.bar(
    df, x='project_name', y='difference_mean',
    labels={'project_name': 'Project Name', 'difference': 'Difference in Median'},
    title='Difference in Median with and without Base Vote (higher mean base vote higher)',
    height=600
)

fig.show()

In [6]:
df_expanded = df.explode('votes')

# Grafico della distribuzione dei voti per ciascun progetto
fig = px.histogram(df_expanded, x='votes', color='project_name', 
                   labels={'votes': 'Voti', 'project_name': 'Nome del Progetto'},
                   title='Votes distribution per project', 
                   barmode='overlay', nbins=50)

fig.show()

In [7]:
fig = px.box(df_expanded, x='project_name', y='votes',
             labels={'project_name': 'Nome del Progetto', 'votes': 'Voti'},
             title='Distribuzione dei Voti per Progetto',
             height=800)

fig.show()


In [8]:
df['mean_vote'] = df['votes'].apply(np.mean)
df['median_vote'] = df['votes'].apply(np.median)

fig = px.scatter(df, x='mean_vote', y='median_vote', color='project_name',
                 labels={'mean_vote': 'Media dei Voti', 'median_vote': 'Mediana dei Voti', 'project_name': 'Nome del Progetto'},
                 title='Confronto tra Progetti')

fig.show()
