# The Social Web Research Project

In this project, Experts of Stackoverflow Question-Answering community are found.

In [None]:
!pip install networkx
!pip install matplotlib
!pip install scipy


## Sample of comments
Classic social network analysis studies a network's structure. In a social network, a person is considered a *node* or *vertex*, and a relationship between people is a *link* or *edge*.
Many network statystics can yield insights on the underlying social structure. At the same time, their intuitive visual representation aids in developing working hypothesis explaining their dynamics.

In [23]:
import networkx as nx
from matplotlib import pyplot as plt
import csv
from datetime import datetime
from prettytable import PrettyTable

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("started at = ", current_time)

G = nx.DiGraph()

edges = []

with open('files/python-tag-data.csv', "r") as csvfile:
    graph_reader = csv.reader(csvfile, delimiter=',')
    data_row_count = 0
    for row in graph_reader:
        if data_row_count == 0:
            print(f'Column names are {", ".join(row)}')
            data_row_count += 1
        else:
            edges.append((row[0], row[1], row[2]))
            data_row_count += 1
    now = datetime.now()
    current_time = now.strftime("%H:%M:%S")
    print(f'Processed {data_row_count} lines = ', current_time)

# G.add_edges_from(edges)

def aggregate(G, edges):
    for edge in edges:
            from_node, to_node = edge[0], edge[1]
            if G.has_edge(from_node, to_node):
                # we added this one before, just increase the weight by one
                G[from_node][to_node]['weight'] += 1
            else:
                # new edge. add with weight=1
                G.add_edge(from_node, to_node, weight=1)

def aggregate_with_scores(G, edges):
    for edge in edges:
            from_node, to_node, score = edge[0], edge[1], float(edge[2])
            if score == 0:
                score = 1
            else:
                score = score
            if G.has_edge(from_node, to_node):
                # we added this one before, just increase the weight by one
                G[from_node][to_node]['weight'] += score
            else:
                # new edge. add with weight=1
                G.add_edge(from_node, to_node, weight=score)

# aggregate(G, edges)
aggregate_with_scores(G, edges)

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print('aggregated the edges = ', current_time)

ppr1 = nx.pagerank(G)

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print('calculated pagerank = ', current_time)


sen_rank = sorted(ppr1.items(), key=lambda x: x[1], reverse=True)

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("done showing pagerank = ", current_time)

my_table = PrettyTable()

my_table.field_names = ["UserId", "User_Profile_URL", "PageRank_Score"]

count = 0
for ele1, ele2 in sen_rank:
    count = count + 1
    if count == 100:
        break
    my_table.add_row([ele1, 'https://stackoverflow.com/users/' + ele1, ele2])

print(my_table)

#labels = nx.get_edge_attributes(G,'weight')
#pos = nx.spring_layout(G)
#nx.draw(G, pos, with_labels = True, node_color="#f86e00", connectionstyle='arc3, rad = 0.3')
#nx.draw_networkx_edge_labels(G, pos, edge_labels=labels)


plt.show()

now = datetime.now()
current_time = now.strftime("%H:%M:%S")
print("done rendering = ", current_time)

started at =  10:54:04
Column names are OwnerUserId, OwnerUserId, score
Processed 109586 lines =  10:54:04
aggregated the edges =  10:54:04
calculated pagerank =  10:54:05
done showing pagerank =  10:54:05
+----------+------------------------------------------+-----------------------+
|  UserId  |             User_Profile_URL             |     PageRank_Score    |
+----------+------------------------------------------+-----------------------+
| 16343464 | https://stackoverflow.com/users/16343464 |  0.012860191800329919 |
| 15497888 | https://stackoverflow.com/users/15497888 |  0.006825716761438725 |
| 2901002  | https://stackoverflow.com/users/2901002  |  0.005697179772619251 |
| 2001654  | https://stackoverflow.com/users/2001654  |  0.004837529479404747 |
| 6361531  | https://stackoverflow.com/users/6361531  | 0.0037606379383553394 |
| 10035985 | https://stackoverflow.com/users/10035985 | 0.0037030609077163715 |
| 15239951 | https://stackoverflow.com/users/15239951 |  0.002823231273360

## Test weighted

In [None]:
import networkx as nx
D=nx.DiGraph()
D.add_weighted_edges_from([('A','B',0.5),('A','C',0.5)])
print (nx.pagerank(D))

D['A']['C']['weight']=1
print (nx.pagerank(D))