# The Social Web Research Project

In this project, Experts of Stackoverflow Question-Answering community are found.

In [None]:
!pip install networkx
!pip install matplotlib
!pip install scipy


## Sample of comments
Classic social network analysis studies a network's structure. In a social network, a person is considered a *node* or *vertex*, and a relationship between people is a *link* or *edge*.
Many network statystics can yield insights on the underlying social structure. At the same time, their intuitive visual representation aids in developing working hypothesis explaining their dynamics.

In [40]:
import networkx as nx
from matplotlib import pyplot as plt
import csv
from datetime import datetime
from prettytable import PrettyTable

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("started at = ", current_time)

G = nx.DiGraph()

edges = []

def read_csv(path):
    with open(path, "r") as csvfile:
        graph_reader = csv.reader(csvfile, delimiter=',', quotechar='"')
        data_row_count = 0
        list = []
        header = next(graph_reader)
        print(f'Column names are {", ".join(header)}')
        number_of_columns = len(header)
        for row in graph_reader:
            temp_list = []
            for index in range(number_of_columns):
                temp_list.append(row[index])
            list.append(tuple(temp_list))
            data_row_count += 1
        now = datetime.now()
        current_time = now.strftime("%H:%M:%S")
        print(f'Processed {data_row_count} lines = ', current_time)
        return list

edges = read_csv('files/python-tag-data_2021-01-01_to_2021-12-03.csv')

# G.add_edges_from(edges)

def aggregate(G, edges):
    for edge in edges:
            from_node, to_node = edge[0], edge[1]
            if G.has_edge(from_node, to_node):
                # we added this one before, just increase the weight by one
                G[from_node][to_node]['weight'] += 1
            else:
                # new edge. add with weight=1
                G.add_edge(from_node, to_node, weight=1)

def aggregate_with_scores(G, edges):
    for edge in edges:
            from_node, to_node, score = edge[0], edge[1], float(edge[2])
            if G.has_edge(from_node, to_node):
                # we added this one before, just increase the weight by one
                G[from_node][to_node]['weight'] += score
            else:
                # new edge. add with weight=1
                G.add_edge(from_node, to_node, weight=score)

# aggregate(G, edges)
aggregate_with_scores(G, edges)

## def remove_negative_edges():
    ## todo

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print('aggregated the edges = ', current_time)

ppr1 = nx.pagerank(G)

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print('calculated pagerank = ', current_time)


sen_rank = sorted(ppr1.items(), key=lambda x: x[1], reverse=True)

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("done showing pagerank = ", current_time)

my_table = PrettyTable()

my_table.field_names = ["UserId", "User_Profile_URL", "PageRank_Score"]

count = 0
for ele1, ele2 in sen_rank:
    count = count + 1
    if count == 100:
        break
    my_table.add_row([ele1, 'https://stackoverflow.com/users/' + ele1, ele2])

print(my_table)


print('###### evaluation ########')
#'files/2021-01-01 to 2021-12-03 - stackoverflow top users'
top_user_count = 10;
def evaluation(file_path_top_users, ranked_user_ids, user_count):
    user_ids_for_evaluation = read_csv(file_path_top_users)
    # user_ids_for_evaluation_sorted = sorted(user_ids_for_evaluation, key=lambda x: x[1], reverse=True)
    topN = user_ids_for_evaluation[0:user_count]
    list_of_topN_user_ids = list(map(lambda x: x[0], topN))
    matched_user_id_count = 0
    for index in range(user_count):
        target = ranked_user_ids[index]
        if target in list_of_topN_user_ids:
            matched_user_id_count += 1
    match_result = matched_user_id_count/user_count
    return match_result

evalauation_result = evaluation('files/2021-01-01_to_2021-12-03_stackoverflow_top_users.csv', list(map(lambda x: x[0], sen_rank)), top_user_count)
print(f"the evaluation top {top_user_count} result is = {evalauation_result*100}")
#labels = nx.get_edge_attributes(G,'weight')
#pos = nx.spring_layout(G)
#nx.draw(G, pos, with_labels = True, node_color="#f86e00", connectionstyle='arc3, rad = 0.3')
#nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)


# plt.show()

#now = datetime.now()
#current_time = now.strftime("%H:%M:%S")
#print("done rendering = ", current_time)

started at =  12:59:34
Column names are OwnerUserId, OwnerUserId, score
Processed 109585 lines =  12:59:35
aggregated the edges =  12:59:36
calculated pagerank =  12:59:36
done showing pagerank =  12:59:36
+----------+------------------------------------------+-----------------------+
|  UserId  |             User_Profile_URL             |     PageRank_Score    |
+----------+------------------------------------------+-----------------------+
| 16343464 | https://stackoverflow.com/users/16343464 |   0.0131990276088745  |
| 2901002  | https://stackoverflow.com/users/2901002  | 0.0058741687367410925 |
| 2001654  | https://stackoverflow.com/users/2001654  |  0.005124537344678988 |
| 15497888 | https://stackoverflow.com/users/15497888 |  0.004996313772154613 |
| 10035985 | https://stackoverflow.com/users/10035985 | 0.0034974814589655333 |
| 6361531  | https://stackoverflow.com/users/6361531  |  0.003263004558636262 |
| 15239951 | https://stackoverflow.com/users/15239951 | 0.0026756710080465

## Test weighted

In [None]:
import networkx as nx
D=nx.DiGraph()
D.add_weighted_edges_from([('A','B',0.5),('A','C',0.5)])
print (nx.pagerank(D))

D['A']['C']['weight']=1
print (nx.pagerank(D))