In [1]:
import networkx as nx
import math
import pickle
from gplus_graph import GplusGraph

In [2]:
# Read a directed graph
graph = pickle.load(open('gplus_pickle_sample.p', 'r'))

In [56]:
# Calculate feature values of each node based on the set of metrics we use
# 1. Number of followers
# 2. Number of circles you are a part of in other people's circles
# 3. Sum of fractions of circles that you are a part of
# 4. Average of fractions of circles that you are a part of

def calculateFeatureValues(graph):
    for node, data in graph.nodes_iter(data=True):
        in_edges = graph.in_edges(node, data=True)
        total_circles = 0
        total_fraction = 0
        for n1, _, edata in in_edges:
            if 'circle' in edata:
                l = len(edata['circle'])
                total_circles += l
                total_fraction += (l * 1.0 / graph.node[n1]['num_circles'])
        
        data['followers'] = len(in_edges)
        data['in_circles'] = total_circles
        data['sum_circle_fraction'] = total_fraction
    
        #print graph.node[node]

In [57]:
# Feature weights
feature_weights = {}
feature_weights['followers'] = 1
feature_weights['in_circles'] = 1
feature_weights['sum_circle_fraction'] = 1

In [60]:
# Calculating the score for every node based on our metric
def calculateScores(graph):
    scores = {}
    non_gender_count = 0
    for node, data in graph.nodes_iter(data=True):
        if 'gender' not in data:
            pass
        else:
            score = 0
            for feature in feature_weights:
                score += feature_weights[feature] * data[feature]

            scores[node] = score
            #print "Node", node, "with gender", graph.node[node]['gender'], "had a score of", score, "using our metric"
        
    return scores

In [62]:
calculateFeatureValues(graph.g)
scores = calculateScores(graph.g)
for k, v in scores.iteritems():
    print "Node:", k, "Score:", v

Node: 101600394231100833788 Score: 1
Node: 101171267267295051773 Score: 1
Node: 117926198238142332926 Score: 1
Node: 104747723716691230720 Score: 13.8333333333
Node: 115537850308642668545 Score: 1
Node: 106434838227311132675 Score: 1
Node: 100172275628404106038 Score: 1
Node: 109490683336261140486 Score: 1
Node: 117753274394525564935 Score: 1
Node: 106876394798726433078 Score: 1
Node: 101696794148772675595 Score: 1
Node: 111714769899525603342 Score: 4
Node: 115879247168566722575 Score: 1
Node: 103290168996685611025 Score: 1
Node: 116718369761749893139 Score: 1
Node: 105656572281780568085 Score: 1
Node: 112649031229724852246 Score: 1
Node: 115767881439123079191 Score: 1
Node: 117574554323772486921 Score: 1
Node: 103850564865290534940 Score: 1
Node: 110262116694098837533 Score: 1
Node: 110611453360582279169 Score: 1
Node: 105415132823244013603 Score: 2
Node: 101382403763798250839 Score: 1
Node: 111264903994108717222 Score: 1
Node: 118021675828732821545 Score: 1
Node: 10745655626893878890

In [65]:
gender_dict = {}
gender_dict['M'] = 0
gender_dict['F'] = 0
gender_dict['T'] = 0
gender_dict['O'] = 0
total_genders = {'M': 0, 'F': 0, 'T': 0, 'O': 0}
for node, score in scores.iteritems():
    gen = graph.g.node[node]['gender']
    gender_dict[gen] += score
    total_genders[gen] += 1
    
for gen, score in gender_dict.iteritems():
    print gen, total_genders[gen], score * 1.0 / total_genders[gen]
    


M 12363 6.88201998667
T 290 2.5174137931
O 2495 3.19250167001
F 3971 7.92761898766
