# Defining reputation in social network
Efforts by:
1. Haikoo Khandor 20110071
2. Madhav Kanda   20110104
3. Dhruv Patel    20110129

In [None]:
# Necessary imports
import pandas as pd
import matplotlib.pyplot as plt
from correlation import c_clustering
from collections import defaultdict
import networkx as nx
from fairness_goodness_computation import *

In [None]:
# Load the Dataset
Data = open("soc-sign-bitcoinotc.csv", "r")
next(Data, None)  # skip the headers
graph_type = nx.DiGraph() # Directed Graph
df = pd.read_csv("soc-sign-bitcoinotc.csv") # Reading the data
df = df.sort_values(by="Timestamp") # Sorting the data based on the timestamp


In [None]:
df.Weight /= 10 # Reducing weights to -1 to 1 range:
df.Weight = (df.Weight - df.Weight.mean())  # Normalizing the weights
print(df.head())
split = int(len(df) / 8)  # Using first 12.5% data for initial graph
train = df.iloc[:split, :]
test = df.iloc[split:, :]
G = nx.from_pandas_edgelist(
    train, source="Source", target="Target", edge_attr="Weight", create_using=graph_type
) # Creating the initial graph
H = G.copy() # Creating a copy of the initial graph

In [None]:
print(nx.info(H)) 

In [None]:
# Computing Fairness and Goodness
fairness, goodness = compute_fairness_goodness(H) 
# Setting node attributes
nx.set_node_attributes(H, fairness, "fairness") 
nx.set_node_attributes(H, goodness, "goodness")

In [None]:
## Co-relation Clustering
CC = c_clustering(H.copy(), delta=1 / 180, complete_graph=True) # Computing the co-relation clustering

In [None]:
clusters = CC.run() # Running the co-relation clustering
print(len(clusters)) # Printing the number of clusters

In [None]:
dic = dict(zip(range(len(clusters)), clusters)) # Creating a dictionary of clusters

In [None]:
dic

### Metric Calculation:

In [None]:
complete_graph = True # Setting the complete graph parameter to True
weights = nx.get_edge_attributes(H, "Weight")
in_p = defaultdict(int) # Initializing the parameters
out_p = defaultdict(int)
in_n = defaultdict(int)
out_n = defaultdict(int)
N_in_p = defaultdict(int)
N_out_p = defaultdict(int)
N_in_n = defaultdict(int)
N_out_n = defaultdict(int)
preds = {}

for i in range(len(clusters)):
    for u in clusters[i]: 
        for v in H.neighbors(u):
            if v in clusters[i]:
                if weights[(u, v)] > 0: 
                    in_p[i] += weights[(u, v)]
                    N_in_p[i] += 1
                else:
                    in_n[i] += weights[(u, v)]
                    N_in_n[i] += 1
            else:
                if weights[(u, v)] > 0:
                    out_p[i] += weights[(u, v)]
                    N_out_p[i] += 1
                else:
                    out_n[i] += weights[(u, v)]
                    N_out_n[i] += 1
                    
        if complete_graph: # complete incomplete graph by weight prediction using fairness-goodness
            for v in nx.non_neighbors(H, u): 
                if v in clusters[i]:
                    if fairness[u] * goodness[v] > 0:
                        in_p[i] += fairness[u] * goodness[v]
                        preds[(u, v)] = fairness[u] * goodness[v]
                        N_in_p[i] += 1
                    else:
                        in_n[i] += fairness[u] * goodness[v]
                        preds[(u, v)] = fairness[u] * goodness[v]
                        N_in_n[i] += 1
                else:
                    if fairness[u] * goodness[v] > 0:
                        out_p[i] += fairness[u] * goodness[v]
                        preds[(u, v)] = fairness[u] * goodness[v]
                        N_out_p[i] += 1
                    else:
                        out_n[i] += fairness[u] * goodness[v]
                        preds[(u, v)] = fairness[u] * goodness[v]
                        N_out_n[i] += 1

In [None]:
print(N_in_p)
print(N_in_n)
print(N_out_p)
print(N_out_n)

In [None]:
threshold_f = 0.95
threshold_g = 0.1

In [None]:
fair_node = {i: 1 if fairness[i] > threshold_f else 0 for i in H.nodes}
good_node = {i: 1 if goodness[i] > threshold_g else 0 for i in H.nodes}

In [None]:
print("fair_node length: ", len(fair_node))
print("good_node length: ", len(good_node))
print("fairness length: ", len(fairness))
print("goodness length: ", len(goodness))

In [None]:
metrics_g = []
for i in range(len(clusters)):
    trusty_of_clusters = 0
    for u in H.nodes - clusters[i]:
        for j in H.neighbors(u):
            if j in clusters[i]:
                if (u, j) in weights.keys():
                    trusty_of_clusters += weights[(u, j)] * fair_node[u] * fairness[u]

    for v in clusters[i]:
        trusty_of_clusters += goodness[v] * good_node[v]

    trusty_of_clusters = trusty_of_clusters / (len(clusters[i]))
    metrics_g.append(trusty_of_clusters)

In [None]:
metrics_g

In [None]:
pd.DataFrame(
    zip(
        range(len(clusters)),
        [len(clusters[i]) for i in range(len(clusters))],
        [N_in_p[i] for i in range(len(clusters))],
        [N_out_n[i] for i in range(len(clusters))],
        metrics_g,
    )
).sort_values(by=4, ascending=False)

In [None]:
H_add = H.copy() # Creating a copy of the initial graph for adding nodes and edges
import numpy as np

In [None]:
for j in range(200):
    new_node = max(list(H.nodes)) + j + 1
    for i in range(len(clusters)):
        node = np.random.choice(list(clusters[i]), 1, replace=True).item()
        weight = np.random.choice([-1,1])
        H_add.add_edge(new_node, node, Weight=weight)

In [None]:
nx.info(H_add)

In [None]:
fairness, goodness = compute_fairness_goodness(H_add)
nx.set_node_attributes(H_add, fairness, "fairness")
nx.set_node_attributes(H_add, goodness, "goodness")

In [None]:
complete_graph = True
weights = nx.get_edge_attributes(H, "Weight")
# initialize the metrics
in_p = defaultdict(int)
out_p = defaultdict(int)
in_n = defaultdict(int)
out_n = defaultdict(int)
N_in_p = defaultdict(int)
N_out_p = defaultdict(int)
N_in_n = defaultdict(int)
N_out_n = defaultdict(int)

preds_new = {}

for i in range(len(clusters)):
    for u in clusters[i]:
        for v in H.neighbors(u):
            if v in clusters[i]:
                if weights[(u, v)] > 0:
                    in_p[i] += weights[(u, v)]
                    N_in_p[i] += 1
                else:
                    in_n[i] += weights[(u, v)]
                    N_in_n[i] += 1
            else:
                if weights[(u, v)] > 0:
                    out_p[i] += weights[(u, v)]
                    N_out_p[i] += 1
                else:
                    out_n[i] += weights[(u, v)]
                    N_out_n[i] += 1
        if complete_graph: # If the graph is complete, we need to add the edges between the nodes that are not neighbors
            for v in nx.non_neighbors(H, u):
                if v in clusters[i]:
                    if fairness[u] * goodness[v] > 0:
                        in_p[i] += fairness[u] * goodness[v]
                        preds_new[(u, v)] = fairness[u] * goodness[v]
                        N_in_p[i] += 1
                    else:
                        in_n[i] += fairness[u] * goodness[v]
                        preds_new[(u, v)] = fairness[u] * goodness[v]
                        N_in_n[i] += 1
                else:
                    if fairness[u] * goodness[v] > 0:
                        out_p[i] += fairness[u] * goodness[v]
                        preds_new[(u, v)] = fairness[u] * goodness[v]
                        N_out_p[i] += 1
                    else:
                        out_n[i] += fairness[u] * goodness[v]
                        preds_new[(u, v)] = fairness[u] * goodness[v]
                        N_out_n[i] += 1

In [None]:
metrics_f = []
for i in range(len(clusters)):
    trusty_of_clusters = 0
    for u in H.nodes - clusters[i]:
        for j in H.neighbors(u):
            if j in clusters[i]:
                if (u, j) in weights.keys():
                    trusty_of_clusters += weights[(u, j)] * fair_node[u] * fairness[u]
                    
    for v in clusters[i]:
        trusty_of_clusters += goodness[v] * good_node[v]
    trusty_of_clusters = trusty_of_clusters / (len(clusters[i]))
    metrics_f.append(trusty_of_clusters)

In [None]:
metrics_f

preds: predicted weights of missing edges <br>
preds_new: predicted weights of missing edges after addition of new nodes/edges

In [None]:
preds

In [None]:
preds_new

cluster_edges is a dictionary consisting of the predicted edges as values and keys as the clusters in which they are present

In [None]:
cluster_edges = {}
for i in dic.keys():
    nodes_in_cluster = list(dic[i])
    for u, v in preds.keys():
        if u in nodes_in_cluster and v in nodes_in_cluster:
            if i not in cluster_edges.keys():
                cluster_edges[i] = list()
            cluster_edges[i].append(((u, v), preds[(u, v)]))

In [None]:
cluster_edges

In [None]:
cluster_new_edges = {}
for i in dic.keys():
    nodes_in_cluster = list(dic[i])
    for u, v in preds_new.keys():
        if u in nodes_in_cluster and v in nodes_in_cluster:
            if i not in cluster_new_edges.keys():
                cluster_new_edges[i] = list()
            cluster_new_edges[i].append(((u, v), preds_new[(u, v)]))

In [None]:
cluster_new_edges

diff_in_weights_cluster is a dictionary consisting of the sum of absolute differences in predicted weights in each cluster

In [None]:
diff_in_weights_cluster = {}
for i in range(len(clusters)):
    diff_in_weights_cluster[i] = 0
    if i in cluster_edges.keys() and i in cluster_new_edges.keys():
        for j in range(len(cluster_edges[i])):
            ((u1, v1), w1) = cluster_edges[i][j]
            ((u2, v2), w2) = cluster_new_edges[i][j]
            diff_in_weights_cluster[i] += abs(w1 - w2)
        diff_in_weights_cluster[i] = diff_in_weights_cluster[i] / len(cluster_edges[i])

In [None]:
diff_in_weights_cluster

Before

In [None]:
# Plotting the trustworthiness of clusters vs difference in weights
x = pd.Series(diff_in_weights_cluster)
y = pd.Series(metrics_g)
plt.scatter(x, y)
plt.xlabel("Difference in weights")
plt.ylabel("Trustworthiness of clusters")
plt.title("Trustworthiness of clusters vs Difference in weights")
plt.show()

After

In [None]:
# Plotting the trustworthiness of clusters vs difference in weights
x = pd.Series(diff_in_weights_cluster)
y = pd.Series(metrics_f)
plt.scatter(x, y)
plt.xlabel("Difference in weights")
plt.ylabel("Trustworthiness of clusters")
plt.title("Trustworthiness of clusters vs Difference in weights")
plt.show()

In [None]:
m_sort = sorted(metrics_f)
m_sort

Co-relation between trustworthiness levels before and after adding the nodes

In [None]:
plt.plot(metrics_g, metrics_f, "o")
plt.xlabel("Old Trustworthiness of clusters")
plt.ylabel("New Trustworthiness of clusters")
plt.title("Correlation between old and new trustworthiness of clusters")