# Including edge sentiment values in polarity score

### Imports

In [50]:
import pandas as pd
import networkx as nx
import statistics

### Functions

In [None]:
G = nx.read_gexf('../data/19march/graphs/HuntShowdown_network.gexf')

node_polarity_dict = {}

for node in G.nodes(data=True):
    current_node = node[0]

    if current_node not in node_polarity_dict.keys():
        node_polarity_dict[current_node] = []

    for edge in G.edges(data=True):
        to_node = edge[1]

        if to_node == current_node:
            sentiment = edge[2]['attr']
            node_polarity_dict[current_node].append(sentiment)

print(node_polarity_dict)

#### Polatization method 1: Measuring everyone's agreement with each other (Basically standard deviation, I later realized)

In [120]:
high_neg = [0, 0, 0, 0.1, 0.1, 0, 0.2]
high_pos = [1, 1, 1, 0.9, 0.9, 1, 0.8]
high_mid = [0.5, 0.5, 0.5, 0.4, 0.4, 0.5, 0.6]
high_pol = [0.3, 0.3, 0.7, 0.7, 0.2, 0.2, 0.8, 0.8]
total_neg = [0, 0, 0, 0, 0, 0, 0]
total_pos = [1, 1, 1, 1, 1, 1, 1]
total_mid = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5]
total_pol = [0, 0, 0, 0, 1, 1, 1, 1]
mixed = [0, 0.2, 0.4, 0.6, 0.8, 1]
binary = [0, 1]

test_scores = [high_neg, high_pos, high_mid, high_pol, total_neg, total_pos, total_mid, total_pol, mixed, binary]
ideal_out = ['~0', '~0', '~0', '~1', '0', '0', '0', '1', '1', '1']

def pol_score(lis):
    '''Takes in a list of numerical values, outputs one single float representative of the polarization of the whole list'''
    
    #Initialize the average as the first element
    current_average = lis[0]
    #total_score = 0
    value_count = 1
    
    #Skip the first element because it was already included in the average
    for n in lis[1:]:

        score = abs((current_average - n))

#         if score < 0.5:
#             total_score -= score
#         else:
#             total_score += score
        current_average = ((current_average * value_count) + n)/(value_count+1)

        value_count +=1

        
    return score

for idx, i in enumerate(test_scores):
    print(pol_score(i), ideal_out[idx])

0.16666666666666669 ~0
0.16666666666666663 ~0
0.13333333333333336 ~0
0.34285714285714286 ~1
0.0 0
0.0 0
0.0 0
0.5714285714285714 1
0.6 1
1 1


In [97]:
for x in test_scores:
    
    mean = statistics.mean(x)
    std = statistics.stdev(x)
    try:
        pol = std/mean
        print(pol)
        print ('norm:',pol/(pol+1))
    except:
        print (std)

1.3768926368215255
norm: 0.5792826379666692
0.08344803859524395
norm: 0.0770208036035022
0.1420723210410729
norm: 0.12439870787829337
0.5451081150953975
norm: 0.3527960987129639
0.0
0.0
norm: 0.0
0.0
norm: 0.0
1.0690449676496976
norm: 0.5166852264521172
0.7483314773547883
norm: 0.4280260848972461


In [116]:
import numpy as np

def calculate_polarization(numbers):
    """
    Calculates polarization from numerical values using a modified Gini coefficient approach.
    Takes in a list of numbers, outputs a single float representing the polarity
    """
    numbers = np.array(numbers)

    mean = np.mean(numbers)

    absolute_deviations = np.abs(numbers - mean)

    sum_absolute_deviations = np.sum(absolute_deviations)

    gini_coefficient = 2 * (sum_absolute_deviations / (np.sum(numbers) * len(numbers)))

    polarization = 1-gini_coefficient

    return abs(polarization-1)



#Definitetly needs some tweeking, not finalized

for idx, i in enumerate(test_scores):
    print(calculate_polarization(i), ideal_out[idx])

0.3265306122448979 ~0
0.019789734075448373 ~0
0.02881152460984393 ~0
0.125 ~1
nan 0
0.0 0
0.0 0
0.25 1
0.19999999999999996 1
0.4444444444444444 1


  gini_coefficient = 2 * (sum_absolute_deviations / (np.sum(numbers) * len(numbers)))


In [117]:
np.sum(np.array(total_pol) - statistics.mean(total_pol))

0.0

In [118]:
calculate_polarization(total_pol)

0.25