In [1]:
import networkx as nx
import math
import pandas as pd
import sys

In [2]:
df = pd.read_csv('soc-sign-bitcoinotc-labeled.csv')
df

Unnamed: 0,Source,Target,Rating,Time
0,6,2,4,1289241912
1,6,5,2,1289241942
2,1,15,1,1289243140
3,4,3,7,1289245277
4,13,16,8,1289254254
...,...,...,...,...
35587,4499,1810,1,1453612482
35588,2731,3901,5,1453679428
35589,2731,4897,5,1453679434
35590,13,1128,1,1453679633


In [3]:
rated_10_nodes = df[df['Rating'] == 10]['Target'].unique()
most_common_ratings_given_10 = {node: df[df['Source'] == node]['Rating'].mode().tolist() for node in rated_10_nodes}

# Nodes most commonly rated -10
rated_minus_10_nodes = df[df['Rating'] == -10]['Target'].unique()
most_common_ratings_given_minus_10 = {node: df[df['Source'] == node]['Rating'].mode().tolist() for node in rated_minus_10_nodes}

print("Nodes most commonly rated 10 and their most common ratings given out:")
for node, ratings in most_common_ratings_given_10.items():
    print(f"Node {node} gives out ratings {ratings}")

print("\nNodes most commonly rated -10 and their most common ratings given out:")
for node, ratings in most_common_ratings_given_minus_10.items():
    print(f"Node {node} gives out ratings {ratings}")

Nodes most commonly rated 10 and their most common ratings given out:
Node 25 gives out ratings []
Node 1 gives out ratings [1]
Node 127 gives out ratings [1, 10]
Node 119 gives out ratings [10]
Node 202 gives out ratings [1]
Node 257 gives out ratings [5]
Node 284 gives out ratings [1]
Node 283 gives out ratings [1, 2]
Node 45 gives out ratings [1, 2]
Node 7 gives out ratings [1]
Node 395 gives out ratings [-10]
Node 201 gives out ratings [1]
Node 451 gives out ratings [1, 10]
Node 51 gives out ratings [1]
Node 104 gives out ratings [1]
Node 499 gives out ratings [1]
Node 111 gives out ratings [1]
Node 484 gives out ratings [1]
Node 529 gives out ratings [10]
Node 300 gives out ratings [1, 3, 10]
Node 570 gives out ratings [1]
Node 571 gives out ratings [1]
Node 159 gives out ratings [1]
Node 587 gives out ratings [1]
Node 623 gives out ratings [-10, 1, 10]
Node 304 gives out ratings [1]
Node 200 gives out ratings [1]
Node 342 gives out ratings [2, 4]
Node 635 gives out ratings [1]
No

In [5]:
rated_10_nodes = df[df['Rating'] == 10]['Target'].unique()
ratings_given_by_10_nodes = df[df['Source'].isin(rated_10_nodes)]['Rating']
mode_rating_given_by_10_nodes = ratings_given_by_10_nodes.mode().iloc[0]

# Nodes most commonly rated -10
rated_minus_10_nodes = df[df['Rating'] == -10]['Target'].unique()
ratings_given_by_minus_10_nodes = df[df['Source'].isin(rated_minus_10_nodes)]['Rating']
mode_rating_given_by_minus_10_nodes = ratings_given_by_minus_10_nodes.mode().iloc[0]

print(f"The mode rating given by nodes commonly rated 10 is {mode_rating_given_by_10_nodes}")
print(f"The mode rating given by nodes commonly rated -10 is {mode_rating_given_by_minus_10_nodes}")

The mode rating given by nodes commonly rated 10 is 1
The mode rating given by nodes commonly rated -10 is 1


In [6]:
rated_10_nodes = df[df['Rating'] == 10]['Target'].unique()
ratings_given_by_10_nodes = df[df['Source'].isin(rated_10_nodes)]['Rating']
average_rating_given_by_10_nodes = ratings_given_by_10_nodes.mean()

# Nodes most commonly rated -10
rated_minus_10_nodes = df[df['Rating'] == -10]['Target'].unique()
ratings_given_by_minus_10_nodes = df[df['Source'].isin(rated_minus_10_nodes)]['Rating']
average_rating_given_by_minus_10_nodes = ratings_given_by_minus_10_nodes.mean()

print(f"The average rating given by nodes commonly rated 10 is {average_rating_given_by_10_nodes}")
print(f"The average rating given by nodes commonly rated -10 is {average_rating_given_by_minus_10_nodes}")

The average rating given by nodes commonly rated 10 is 0.8602346988967746
The average rating given by nodes commonly rated -10 is -0.0001885369532428356


In [7]:
# Nodes rated between -10 and -3
rated_between_minus_10_and_minus_3_nodes = df[(df['Rating'] >= -10) & (df['Rating'] <= -3)]['Target'].unique()
ratings_given_by_minus_10_and_minus_3_nodes = df[df['Source'].isin(rated_between_minus_10_and_minus_3_nodes)]['Rating']
mode_rating_given_by_minus_10_and_minus_3_nodes = ratings_given_by_minus_10_and_minus_3_nodes.mode().iloc[0]

# Nodes rated between 3 and 10
rated_between_3_and_10_nodes = df[(df['Rating'] >= 3) & (df['Rating'] <= 10)]['Target'].unique()
ratings_given_by_3_and_10_nodes = df[df['Source'].isin(rated_between_3_and_10_nodes)]['Rating']
mode_rating_given_by_3_and_10_nodes = ratings_given_by_3_and_10_nodes.mode().iloc[0]

# Nodes rated between -3 and 3
rated_between_minus_3_and_3_nodes = df[(df['Rating'] >= -3) & (df['Rating'] <= 3)]['Target'].unique()
ratings_given_by_minus_3_and_3_nodes = df[df['Source'].isin(rated_between_minus_3_and_3_nodes)]['Rating']
mode_rating_given_by_minus_3_and_3_nodes = ratings_given_by_minus_3_and_3_nodes.mode().iloc[0]

print(f"The mode rating given by nodes rated between -10 and -3 is {mode_rating_given_by_minus_10_and_minus_3_nodes}")
print(f"The mode rating given by nodes rated between 3 and 10 is {mode_rating_given_by_3_and_10_nodes}")
print(f"The mode rating given by nodes rated between -3 and 3 is {mode_rating_given_by_minus_3_and_3_nodes}")

The mode rating given by nodes rated between -10 and -3 is 1
The mode rating given by nodes rated between 3 and 10 is 1
The mode rating given by nodes rated between -3 and 3 is 1


In [9]:
from statistics import mode

target_mode_ratings = df.groupby('Target')['Rating'].agg(lambda x: mode(x))

# Define the sub-range rating ranges
sub_rating_ranges = {
    'minus_3_to_0': (-3, 0),
    '0_to_3': (0, 3),
}

# Initialize a dictionary to hold the mode ratings for nodes in each sub-range
sub_range_mode_given = {}

# For each sub-range, find the nodes whose mode rating falls within that sub-range
for sub_range_name, (low, high) in sub_rating_ranges.items():
    # Nodes whose mode rating falls within the current sub-range
    nodes_in_sub_range = target_mode_ratings[(target_mode_ratings > low) & (target_mode_ratings <= high)].index.tolist()
    # All ratings given by these nodes
    ratings_given_by_nodes_in_sub_range = df[df['Source'].isin(nodes_in_sub_range)]['Rating']
    # Mode of the ratings given by these nodes
    mode_rating_given_by_nodes_in_sub_range = ratings_given_by_nodes_in_sub_range.mode().iloc[0] if not ratings_given_by_nodes_in_sub_range.empty else 'No mode rating'
    # Store the mode rating in the dictionary
    sub_range_mode_given[sub_range_name] = mode_rating_given_by_nodes_in_sub_range

# Print the mode rating for each sub-range
for sub_range_name, mode_rating in sub_range_mode_given.items():
    print(f"The mode rating given by nodes whose mode rating falls within {sub_range_name} is: {mode_rating}")

The mode rating given by nodes whose mode rating falls within minus_3_to_0 is: 1
The mode rating given by nodes whose mode rating falls within 0_to_3 is: 1


In [8]:
# Nodes rated between -10 and -3
rated_between_minus_10_and_minus_3_nodes = df[(df['Rating'] >= -10) & (df['Rating'] <= -3)]['Target'].unique()
ratings_given_by_minus_10_and_minus_3_nodes = df[df['Source'].isin(rated_between_minus_10_and_minus_3_nodes)]['Rating']
average_rating_given_by_minus_10_and_minus_3_nodes = ratings_given_by_minus_10_and_minus_3_nodes.mean()

# Nodes rated between 3 and 10
rated_between_3_and_10_nodes = df[(df['Rating'] >= 3) & (df['Rating'] <= 10)]['Target'].unique()
ratings_given_by_3_and_10_nodes = df[df['Source'].isin(rated_between_3_and_10_nodes)]['Rating']
average_rating_given_by_3_and_10_nodes = ratings_given_by_3_and_10_nodes.mean()

# Nodes rated between -3 and 3
rated_between_minus_3_and_3_nodes = df[(df['Rating'] >= -3) & (df['Rating'] <= 3)]['Target'].unique()
ratings_given_by_minus_3_and_3_nodes = df[df['Source'].isin(rated_between_minus_3_and_3_nodes)]['Rating']
average_rating_given_by_minus_3_and_3_nodes = ratings_given_by_minus_3_and_3_nodes.mean()

print(f"The average rating given by nodes rated between -10 and -3 is {average_rating_given_by_minus_10_and_minus_3_nodes}")
print(f"The average rating given by nodes rated between 3 and 10 is {average_rating_given_by_3_and_10_nodes}")
print(f"The average rating given by nodes rated between -3 and 3 is {average_rating_given_by_minus_3_and_3_nodes}")

The average rating given by nodes rated between -10 and -3 is 0.1336097478422745
The average rating given by nodes rated between 3 and 10 is 1.1562705426922797
The average rating given by nodes rated between -3 and 3 is 1.1025980378053342


In [10]:
# Nodes rated between -10 and -3
rated_between_minus_10_and_minus_3_nodes = df[(df['Rating'] >= -10) & (df['Rating'] <= -3)]['Target'].unique()
ratings_given_by_minus_10_and_minus_3_nodes = df[df['Source'].isin(rated_between_minus_10_and_minus_3_nodes)]['Rating']
average_rating_given_by_minus_10_and_minus_3_nodes = ratings_given_by_minus_10_and_minus_3_nodes.mean()

# Nodes rated between -3 and 0
rated_between_minus_3_and_0_nodes = df[(df['Rating'] >= -3) & (df['Rating'] <= 0)]['Target'].unique()
ratings_given_by_minus_3_and_0_nodes = df[df['Source'].isin(rated_between_minus_3_and_0_nodes)]['Rating']
average_rating_given_by_minus_3_and_0_nodes = ratings_given_by_minus_3_and_0_nodes.mean()

# Nodes rated between 0 and 3
rated_between_0_and_3_nodes = df[(df['Rating'] > 0) & (df['Rating'] <= 3)]['Target'].unique()
ratings_given_by_0_and_3_nodes = df[df['Source'].isin(rated_between_0_and_3_nodes)]['Rating']
average_rating_given_by_0_and_3_nodes = ratings_given_by_0_and_3_nodes.mean()

# Nodes rated between 3 and 10
rated_between_3_and_10_nodes = df[(df['Rating'] >= 3) & (df['Rating'] <= 10)]['Target'].unique()
ratings_given_by_3_and_10_nodes = df[df['Source'].isin(rated_between_3_and_10_nodes)]['Rating']
average_rating_given_by_3_and_10_nodes = ratings_given_by_3_and_10_nodes.mean()

print(f"The average rating given by nodes rated between -10 and -3 is {average_rating_given_by_minus_10_and_minus_3_nodes}")
print(f"The average rating given by nodes rated between -3 and 0 is {average_rating_given_by_minus_3_and_0_nodes}")
print(f"The average rating given by nodes rated between 0 and 3 is {average_rating_given_by_0_and_3_nodes}")
print(f"The average rating given by nodes rated between 3 and 10 is {average_rating_given_by_3_and_10_nodes}")

The average rating given by nodes rated between -10 and -3 is 0.1336097478422745
The average rating given by nodes rated between -3 and 0 is 0.8366826481689279
The average rating given by nodes rated between 0 and 3 is 1.1038130097702972
The average rating given by nodes rated between 3 and 10 is 1.1562705426922797


In [20]:
from statistics import multimode

modes = df.groupby('Target')['Rating'].agg(lambda x: multimode(x)[0] if multimode(x) else None)

# Define the ranges
range_min_10_min_3 = modes.between(-10, -4, inclusive='both')
range_3_10 = modes.between(4, 10, inclusive='both')
range_min_3_3 = modes.between(-3, 3, inclusive='both')

# Calculate the percentage of nodes whose mode falls within each range
percent_min_10_min_3 = range_min_10_min_3.sum() / len(modes) * 100
percent_3_10 = range_3_10.sum() / len(modes) * 100
percent_min_3_3 = range_min_3_3.sum() / len(modes) * 100

print(f"The percentage of nodes whose mode rating is between -10 and -4 is {percent_min_10_min_3:.2f}%")
print(f"The percentage of nodes whose mode rating is between 4 and 10 is {percent_3_10:.2f}%")
print(f"The percentage of nodes whose mode rating is between -3 and 3 is {percent_min_3_3:.2f}%")

# Sum the percentages to verify they add up to 100%.
total_percent = percent_min_10_min_3 + percent_3_10 + percent_min_3_3
print(f"The total percentage covered by these mode ranges is {total_percent:.2f}%")

The percentage of nodes whose mode rating is between -10 and -4 is 6.98%
The percentage of nodes whose mode rating is between 4 and 10 is 5.21%
The percentage of nodes whose mode rating is between -3 and 3 is 87.81%
The total percentage covered by these mode ranges is 100.00%


In [12]:
from statistics import multimode

modes = df.groupby('Target')['Rating'].agg(lambda x: multimode(x)[0] if multimode(x) else None)

# Define the ranges
range_min_10_min_3 = modes.between(-10, -3, inclusive='both')
range_min_3_0 = modes.between(-3, 0, inclusive='left') & ~range_min_10_min_3
range_0_3 = modes.between(0, 3, inclusive='right') & ~range_min_10_min_3 & ~range_min_3_0
range_3_10 = modes.between(3, 10, inclusive='both') & ~range_min_10_min_3 & ~range_min_3_0 & ~range_0_3

# Calculate the percentage of nodes whose mode falls within each range
percent_min_10_min_3 = range_min_10_min_3.sum() / len(modes) * 100
percent_min_3_0 = range_min_3_0.sum() / len(modes) * 100
percent_0_3 = range_0_3.sum() / len(modes) * 100
percent_3_10 = range_3_10.sum() / len(modes) * 100

print(f"The percentage of nodes whose mode rating is between -10 and -3 is {percent_min_10_min_3:.2f}%")
print(f"The percentage of nodes whose mode rating is between -3 and 0 is {percent_min_3_0:.2f}%")
print(f"The percentage of nodes whose mode rating is between 0 and 3 is {percent_0_3:.2f}%")
print(f"The percentage of nodes whose mode rating is between 3 and 10 is {percent_3_10:.2f}%")

# Sum the percentages to verify they add up to 100%.
total_percent = percent_min_10_min_3 + percent_min_3_0 + percent_0_3 + percent_3_10
print(f"The total percentage covered by these mode ranges is {total_percent:.2f}%")


The percentage of nodes whose mode rating is between -10 and -3 is 7.08%
The percentage of nodes whose mode rating is between -3 and 0 is 2.54%
The percentage of nodes whose mode rating is between 0 and 3 is 85.17%
The percentage of nodes whose mode rating is between 3 and 10 is 5.21%
The total percentage covered by these mode ranges is 100.00%


In [21]:
# Function to initialize the fairness and goodness scores
def initialize_scores(G):
    fairness = {}
    goodness = {}

    # Initialize fairness to 1 and goodness based on in-degree and weights
    nodes = G.nodes()
    for node in nodes:
        fairness[node] = 1
        in_edges = G.in_edges(node, data='weight')
        if in_edges:  # Ensure there are incoming edges to avoid ZeroDivisionError
            goodness[node] = sum(weight for _, _, weight in in_edges) / len(in_edges)
        else:
            goodness[node] = 0
    return fairness, goodness

def normalize_scores(scores):
    max_score = max(abs(score) for score in scores.values())
    if max_score > 0:
        for node in scores:
            scores[node] /= max_score
    return scores

# Function to compute fairness and goodness
def compute_fairness_goodness(G):
    fairness, goodness = initialize_scores(G)
    nodes = G.nodes()
    iter = 0
    while iter < 100:
        df = 0
        dg = 0

        # Update goodness scores
        for node in nodes:
            inedges = G.in_edges(node, data='weight')
            if inedges:
                g = sum(fairness[edge[0]] * edge[2] for edge in inedges)
                new_goodness = g / len(inedges) if len(inedges) > 0 else 0
                dg += abs(new_goodness - goodness[node])
                goodness[node] = new_goodness

        # Normalize goodness to prevent overflow
        goodness = normalize_scores(goodness)

        # Update fairness scores
        for node in nodes:
            outedges = G.out_edges(node, data='weight')
            if outedges:
                f = sum(1.0 - abs(edge[2] - goodness[edge[1]]) / 2.0 for edge in outedges)
                new_fairness = f / len(outedges) if len(outedges) > 0 else 0
                df += abs(new_fairness - fairness[node])
                fairness[node] = new_fairness

        # Normalize fairness to prevent overflow
        fairness = normalize_scores(fairness)

        # Check for convergence
        if df < math.pow(10, -6) and dg < math.pow(10, -6):
            break

        iter += 1
    
    return fairness, goodness

# Create a directed graph from a pandas DataFrame
G = nx.DiGraph()

# Assuming 'df' is your DataFrame containing the data.
# Ensure that the 'Rating' column is converted to a float
for index, row in df.iterrows():
    G.add_edge(row['Source'], row['Target'], weight=float(row['Rating']))  # Convert rating to float

# Compute the fairness and goodness scores
fairness, goodness = compute_fairness_goodness(G)


In [22]:
df['Fairness'] = df['Source'].map(fairness)
df['Goodness'] = df['Target'].map(goodness)
df

Unnamed: 0,Source,Target,Rating,Time,Fairness,Goodness
0,6,2,4,1289241912,-0.159913,-0.042973
1,6,5,2,1289241942,-0.159913,-0.022760
2,1,15,1,1289243140,-0.080051,-0.001037
3,4,3,7,1289245277,-0.186980,-0.019818
4,13,16,8,1289254254,-0.017018,-0.013614
...,...,...,...,...,...,...
35587,4499,1810,1,1453612482,-0.030608,0.050283
35588,2731,3901,5,1453679428,-0.039879,-0.013105
35589,2731,4897,5,1453679434,-0.039879,-0.001126
35590,13,1128,1,1453679633,-0.017018,0.003272


In [23]:
top_n = 10
top_fairness_nodes = sorted(fairness.items(), key=lambda item: item[1], reverse=True)[:top_n]

for node, fairness in top_fairness_nodes:
    print(f'Node {node} has a fairness of {fairness}')

top_n = 10
top_goodness_nodes = sorted(goodness.items(), key=lambda item: item[1], reverse=True)[:top_n]

for node, goodness in top_goodness_nodes:
    print(f'Node {node} has a goodness of {goodness}')

Node 4655.0 has a fairness of 0.1728309975598684
Node 4433.0 has a fairness of 0.15097747805010683
Node 2704.0 has a fairness of 0.14831916875179357
Node 2718.0 has a fairness of 0.1407719346673379
Node 5471.0 has a fairness of 0.13606198447874188
Node 5704.0 has a fairness of 0.1318826743806233
Node 4637.0 has a fairness of 0.1308094363068766
Node 4600.0 has a fairness of 0.12900624743333383
Node 2694.0 has a fairness of 0.12854806619627931
Node 5086.0 has a fairness of 0.12854806619627931
Node 3665.0 has a goodness of 1.0
Node 4182.0 has a goodness of 1.0
Node 4737.0 has a goodness of 0.8300908140971991
Node 3786.0 has a goodness of 0.8257301937003192
Node 3787.0 has a goodness of 0.8257301937003192
Node 3788.0 has a goodness of 0.8257301937003192
Node 3789.0 has a goodness of 0.8257301937003192
Node 3790.0 has a goodness of 0.8257301937003192
Node 3792.0 has a goodness of 0.8257301937003192
Node 3791.0 has a goodness of 0.8257301937003192
