In [None]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
import numpy as np

In [None]:
#raw databese from "HOPR Community Survey (Responses)"

In [None]:
df = pd.read_csv('hopr_com_surv.csv')

In [None]:
#merging the 3 NFT address variable to 1

In [None]:
df['What is your xDAI address for receiving an NFT?'] = df['What is your xDAI address for receiving an NFT?'].str.lower()
df['What is your xDAI address for receiving an NFT?.1'] = df['What is your xDAI address for receiving an NFT?.1'].str.lower()
df['What is your xDAI address for receiving an NFT?.2'] = df['What is your xDAI address for receiving an NFT?.2'].str.lower()

In [None]:
df['address'] = df['What is your xDAI address for receiving an NFT?'].fillna(df['What is your xDAI address for receiving an NFT?.1'])

In [None]:
df['address'] = df['address'].fillna(df['What is your xDAI address for receiving an NFT?.2'])

In [None]:
#removing the duplicates from the dataframe based on the addresses

In [None]:
df = df.drop_duplicates(subset=['address'])

In [None]:
#dropping the "technical" and open questions

In [None]:
df = df.drop(columns=['For quality control, please select 4 as the answer to this question', 'For quality control, please select 3 as the answer to this question', 'Would you like to stop?', 'Would you like to stop?.1', 'Would you like to say anything to the team?', 'How could we improve the setup process?.1'])

In [None]:
#recoding the community membership, to creat a "linear" order

In [None]:
df['How long have you been a HOPR community member?'] = df['How long have you been a HOPR community member?'].replace({
    '1 - 3 months':'d) 1 - 3 months',
'3 - 9 months':'c) 3 - 9 months', 
'More than a year':'b) More than a year',
"I don't consider myself a HOPR community member":"f) I don't consider myself a HOPR community member",
'Since launch (Dec 2020 - February 2021)':'a) Since launch (Dec 2020 - February 2021)',
'Less than 1 month':'e) Less than 1 month'})

In [None]:
#creating an alternative variable for community membership (merging the less than 1 month into 1-3 months)

In [None]:
df['Community_member'] = df['How long have you been a HOPR community member?'].replace({'e) Less than 1 month':'d) 1 - 3 months'})

In [None]:
#function for crosstabulation

In [None]:
def crostab(df, col1, col2):
    
    # col1 is the "dependent variable" - any column of the questionair can be used, that does not include open answers
    # col2 is the "explanatory variable" - any column of the questionair can be used, that does not include open answers
    df_cross = pd.crosstab(df[col1], df[col2], normalize="index")
    data = []
    
    # "normalize" creates 100% stacked bar, which gives percentage instead of absolute numbers
    for x in df_cross.columns:
        data.append(go.Bar(name=str(x), 
                      x=df_cross.index, 
                      y=df_cross[x]))
    
    title = col1 + " vs. <br>"  + col2 + ' (sample size:' + str(len(df[col1])) + ')'
    
    figure = go.Figure(data)
    figure.update_layout(barmode = 'stack', title_text=title)
    
    figure.show()
    
    return df_cross

In [None]:
#the function can be used independently, with two variables e.g.:
crostab(df, 'How long have you been a HOPR community member?', 'Do you read the HOPR Basics series? (https://medium.com/hoprnet/basics/home)')

In [None]:
#with the help of "summary" a whole set of questions can be analysed

In [None]:
# set the list of dependent (dep) and explanatory (exp) variables
dep = ["How long have you been a HOPR community member?", "I consider myself a technical person", "I understand what cover traffic is"]

In [None]:
exp = ["Have you seen any of the HOPR Community Update videos?","I care more about earning money from node running than providing data privacy","I consider myself a technical person","I know a lot about crypto",'I know what web3 means','I understand how HOPR works',"I understand what HOPR is trying to achieve","I understand what metadata is","I know what a mixnet is",'I understand what "layer 0 protocol" means','I understand what MEV is',"I understand how HOPR provides privacy", "I understand how HOPR incentivizes node runners","I understand what cover traffic is","I understand HOPR's proof-of relay mechanism","I'm interested in HOPR governance / the HOPR DAO","Do you read the HOPR Basics series? (https://medium.com/hoprnet/basics/home)","Have you run a HOPR node before?","Did you participate in either of the HOPR Governance experiments? (Either on the forum or by voting)","Do you participate in the HOPR staking program?"]

In [None]:
#"summary" gives you the list of crosstabulations, included in the included lists
def summary (dependent, explanatory):
    analytics=[]
    for k in range(len(dependent)):
        for l in range(len(explanatory)):
            chart = crostab(df, dependent[k], explanatory[l])
            analytics.append(chart)

In [None]:
#"summary" gives you the list of crosstabulations, included in the included lists
def summary (df, dependent, explanatory):
    analytics=[]
    for k in range(len(dependent)):
        for l in range(len(explanatory)):
            chart = crostab(df, dependent[k], explanatory[l])
            analytics.append(chart)

In [None]:
summary(df, dep, exp)

In [None]:
# for numeric variables it's easier to use/undersand correlation

In [None]:
def heatmap (df_only_numeric):
    #df_only_numeric requires a dataframe should include only a dataframe with numeric variables (including likert scales)
    df_corr = df_only_numeric.corr()
    heat = go.Heatmap(
        z = df_corr,
        x = df_corr.columns.values,
        y = df_corr.columns.values,
        zmin = - 1, # Sets the lower bound of the color domain
        zmax = 1,
        colorscale = 'RdBu'
    )

    title = 'Correlation Matrix'

    layout = go.Layout(
        title_text=title, 
        title_x=0.5, 
        xaxis_showgrid=False,
        yaxis_showgrid=False,
        yaxis_autorange='reversed'
    )

    fig=go.Figure(data=[heat], layout=layout)
    fig.show()

In [None]:
#These quesions relation to each other I would rather check with correlation 
df_numeric_list = df[["Did you participate in either of the HOPR Governance experiments? (Either on the forum or by voting)","I care more about earning money from node running than providing data privacy","I consider myself a technical person","I know a lot about crypto",'I know what web3 means','I understand how HOPR works',"I understand what HOPR is trying to achieve","I understand what metadata is","I know what a mixnet is",'I understand what "layer 0 protocol" means','I understand what MEV is',"I understand how HOPR provides privacy", "I understand how HOPR incentivizes node runners","I understand what cover traffic is","I understand HOPR's proof-of relay mechanism","I'm interested in HOPR governance / the HOPR DAO"]]

In [None]:
heatmap(df_numeric_list)

In [None]:
# voting on Dai, HOPR and participation (df_d, df_h, df_p)

In [None]:
df_d = pd.read_csv('snapshot-report-d.csv')
df_h = pd.read_csv('snapshot-report-h.csv')
df_p = pd.read_csv('dao_alloc_full.csv')

In [None]:
#dropping columns which not needed

In [None]:
df_h = df_h.drop(columns=['timestamp', 'dateUtc', 'authorIpfsHash'])
df_d = df_d.drop(columns=['timestamp', 'dateUtc', 'authorIpfsHash'])

In [None]:
#renaming the "choice" variable to the proposal number

In [None]:
df_h.columns = ['address', 'h5', 'balance_h', 'h4', 'h3', 'h2', 'h7']
df_d.columns = ['address', 'd12', 'balance_d', 'd3', 'd2', 'd6', 'd7', 'd14', 'd10']

In [None]:
#changing NAN to 0

In [None]:
df_h = df_h.fillna(0)
df_d = df_d.fillna(0)

In [None]:
#recoding weight to 1

In [None]:
df_h['h5'] = df_h['h5'].apply(lambda x: 1 if x != 0 else 0)
df_h['h4'] = df_h['h4'].apply(lambda x: 1 if x != 0 else 0)
df_h['h3'] = df_h['h3'].apply(lambda x: 1 if x != 0 else 0)
df_h['h2'] = df_h['h2'].apply(lambda x: 1 if x != 0 else 0)
df_h['h7'] = df_h['h7'].apply(lambda x: 1 if x != 0 else 0)

In [None]:
df_d['d12'] = df_d['d12'].apply(lambda x: 1 if x != 0 else 0)
df_d['d3'] = df_d['d3'].apply(lambda x: 1 if x != 0 else 0)
df_d['d2'] = df_d['d2'].apply(lambda x: 1 if x != 0 else 0)
df_d['d6'] = df_d['d6'].apply(lambda x: 1 if x != 0 else 0)
df_d['d7'] = df_d['d7'].apply(lambda x: 1 if x != 0 else 0)
df_d['d14'] = df_d['d14'].apply(lambda x: 1 if x != 0 else 0)
df_d['d10'] = df_d['d10'].apply(lambda x: 1 if x != 0 else 0)

In [None]:
#changing addresses to lower case

In [None]:
df_d['address'] = df_d['address'].str.lower()
df_h['address'] = df_h['address'].str.lower()
df_p['add'] = df_p['add'].str.lower()

In [None]:
#removing duplicates and selecting the accounts with higher than 0 scores

In [None]:
df_p2 = (df_p.groupby(['add']).agg({'score': ['max'], 'NFT_status': ['first']})).reset_index(drop=False)
df_p2.head(3)

In [None]:
df_p2.columns = ['address', 'H_score', 'NFT_status']

In [None]:
df_p2 = df_p2.dropna().reset_index(drop=True)

In [None]:
#merging the dataframes

In [None]:
df_m = pd.merge(df, df_p2, how="left", on=["address"])

In [None]:
df_m = pd.merge(df_m, df_h, how="left", on=["address"])

In [None]:
df_m = pd.merge(df_m, df_d, how="left", on=["address"])

In [None]:
#maximyzing the number of available balance

In [None]:
df_m['balance'] = df_m['balance_d'].fillna(df_m['balance_h'])

In [None]:
#subdataframe for voters

In [None]:
dai_voters = df_m.dropna(subset=['balance_d']).reset_index(drop=True)
hopr_voters = df_m.dropna(subset=['balance_h']).reset_index(drop=True)
forum = df_m.dropna(subset=['NFT_status']).reset_index(drop=True)

In [None]:
#creating variable for voters, who have scoosen proposal 12|7 in dai or 4|5 in hopr

In [None]:
dai_voters['dai_proposal_12|7'] = (dai_voters['d12'] == 1) | (dai_voters['d7'] == 1)
hopr_voters['hopr_proposal_4|5'] = (hopr_voters['h4'] == 1) | (hopr_voters['h5'] == 1)

In [None]:
#size of the d12|d17 voters group 

In [None]:
dai_voters['dai_proposal_12|7'].value_counts()

In [None]:
hopr_voters['hopr_proposal_4|5'].value_counts()

In [None]:
exp_dai = ['dai_proposal_12|7']
exp_hopr = ['hopr_proposal_4|5']
exp_forum = ['NFT_status']

In [None]:
summary(dai_voters, dep, exp_dai)

In [None]:
summary(hopr_voters, dep, exp_hopr)

In [None]:
summary(forum, dep, exp_forum)