In [1]:
import pandas as pd
import numpy as np
import sys
import os

import time, enum, math
import pylab as plt
import random
import warnings
from collections import defaultdict
warnings.filterwarnings('ignore')

from state import State
from Agent import MyAgent
from NetworkInformationDiffusionModel import NetworkInformationDiffusionModel
# from Visualization import Visualization
from Data import Data
# import seaborn as sns

import networkx as nx
# from numba import jit

import warnings
warnings.filterwarnings('ignore')

In [2]:
def compute_polarization(pol_incl):

    mu = 0
    pop1 = pol_incl[pol_incl > 0]
    pop2 = pol_incl[pol_incl < 0]

    if(pop1.shape[0] != 0 and pop2.shape[0] != 0):

        dA = abs(len(pop1) - len(pop2))/pol_incl.shape[0]
        d = abs(pop1.mean() - pop2.mean())/2
        mu = (1-dA) * d

    return mu

In [3]:
from collections import deque

def add_edges(G, data):
    
    visited = set()
    add_edges = []
    nodes = list(range(len(data)-3))
    
    q = deque([nodes.pop()])
    connection_threshold = 8
    
    while len(nodes) > 0:
        
        if len(q) == 0:
            q.append(nodes.pop())
        cur = q.popleft()
        
        if len(list(G.edges(cur))) > connection_threshold:
            visited.add(cur)
            
        if cur not in visited:
            cur_polInc = data.iloc[cur]['pol_inclination']
            edges = list(G.edges(cur)).copy()
            for edge in edges:
                neighbor = edge[1]
                if neighbor not in visited:
                    if len(list(G.edges(neighbor))) <= connection_threshold:
                        q.append(neighbor)
                        visited.add(neighbor)

                        neighbors = list(G.edges(neighbor)).copy()
                        # print(cur, neighbors, q)
                        for n_edges in neighbors:
                            n = n_edges[1]
                            if len(list(G.edges(n))) <= connection_threshold:
                                neigh_data = data.iloc[n]
                                neigh_polInc = neigh_data['pol_inclination']
                                tolerance = neigh_data['lat_rej'] - neigh_data['lat_acc']

                                # print(cur, n, cur_polInc, neigh_polInc, tolerance)
                                if abs(cur_polInc - neigh_polInc) < tolerance and cur != n:
                                    add_edges.append((cur, n))
                        
    G.add_edges_from(add_edges)
    return G


def update_tolerance(aggSanc, G, data):
    
    remove_edges = []
    nodes = list(range(len(data)-3))
    
    for cur in nodes:
        
        if cur in aggSanc.nodes:
            
            agg_sanc = sum([edge['weight'] for _, _, edge in aggSanc.out_edges(cur, data=True)])

            
            ## Decrease tolerance
            if agg_sanc > 0:
                delta = 0.01 * agg_sanc
                if data.iloc[cur]['lat_acc'] + delta < data.iloc[cur]['lat_rej'] - delta and data.iloc[cur]['lat_acc'] + delta < 2 and data.iloc[cur]['lat_rej'] - delta > 0:
                    data.at[cur, 'lat_acc'] = data.iloc[cur]['lat_acc'] + delta
                    data.at[cur, 'lat_rej'] = data.iloc[cur]['lat_rej'] - delta

            ## Increase tolerance
            if agg_sanc < 0:
                delta = 0.01 * agg_sanc
                if data.iloc[cur]['lat_acc'] + delta > 0 and data.iloc[cur]['lat_rej'] - delta < 2:
                    data.at[cur, 'lat_acc'] = data.iloc[cur]['lat_acc'] + delta
                    data.at[cur, 'lat_rej'] = data.iloc[cur]['lat_rej'] - delta
        
    node_attr = data.set_index('id').to_dict('index')
    nx.set_node_attributes(G, node_attr)
    
    return G, data


 
def sever_edges(G, data):
    
    visited = set()
    remove_edges = []
    nodes = list(range(len(data)-3))
    
    q = deque([nodes.pop()])
    connection_threshold = 4
    
    while len(nodes) > 0:
        
        if len(q) == 0:
            q.append(nodes.pop())
        cur = q.popleft()
        
        if len(list(G.edges(cur))) <= connection_threshold:
            visited.add(cur)
        
        if cur not in visited:
            cur_polInc = data.iloc[cur]['pol_inclination']
            edges = list(G.edges(cur)).copy()
            for edge in edges:
                neighbor = edge[1]
                if neighbor not in visited:
                    if len(list(G.edges(neighbor))) > connection_threshold:
                        q.append(neighbor)
                        visited.add(neighbor)
                        neigh_data = data.iloc[neighbor]
                        neigh_polInc = neigh_data['pol_inclination']
                        tolerance = neigh_data['lat_rej'] - neigh_data['lat_acc']
                        if abs(cur_polInc - neigh_polInc) > tolerance:
                            remove_edges.append((cur, neighbor))
                        
    G.remove_edges_from(remove_edges)
    return G

def aggregate_sanction_graph(graphs):
    edge_weights = defaultdict(list)

    for G in graphs:
        for u, v, data in G.edges(data=True):
            weight = data.get('weight', 1.0)
            edge_weights[(u, v)].append(weight)

    aggregated = nx.DiGraph()
    for (u, v), weights in edge_weights.items():
        mean_weight = sum(weights) / len(weights)
        aggregated.add_edge(u, v, weight=mean_weight)

    return aggregated


def plot(G):

    indegrees = [G.degree(n) * 10 for n in G.nodes()]  # Scale for visibility
    pos = nx.spring_layout(G)
    plt.figure(figsize=(4, 4))
    nx.draw(G, with_labels=True, edge_color="gray", node_size=indegrees)
    plt.show()
    

def update_news_followers(G, data, aggSanc):
    
    remove_edges = []
    add_edges = []
    
    for n in [100, 101, 102]:
        followers_sanct = []
        
        if n in aggSanc:
            for u, v, w in aggSanc.in_edges(n, data=True):
                weight = w.get('weight')
                followers_sanct.append((u, weight))
            followers_df = pd.DataFrame(followers_sanct, columns = ['followers', 'sanctions'])

            negative_df = followers_df[followers_df['sanctions'] < 0].copy()
            negative_df['abs_sanctions'] = negative_df['sanctions'].abs()
            total_weight = negative_df['abs_sanctions'].sum()
            negative_df['prob'] = negative_df['abs_sanctions'] / total_weight

            # Step 4: Determine number to pick (10%, ceil)
            # num_to_pick = math.ceil(len(negative_df) * 0.1)
            num_to_pick = min(3, math.ceil(len(negative_df) * 0.1))

            # Step 5: Sample followers
            if negative_df['prob'].sum() > 0:
                selected = negative_df.sample(n=num_to_pick, weights='prob', replace=False)
            else:
                selected = negative_df.sample(n=num_to_pick, replace=False)
                

            for follower in selected['followers']:
                remove_edges.append((n, follower))
                
            nonfollowers = [i for i in range(100) if i not in followers_df.followers.tolist()]
            nonfollowers_df = data.iloc[nonfollowers]
            
            news_pol = data.iloc[n]['pol_inclination']
            nonfollowers_df['diff'] = (nonfollowers_df['pol_inclination'] - news_pol).abs()
            
            # print(followers_df)
            # print('------------------')
            # print(nonfollowers_df)
            
            num_to_pick = min(3, math.ceil(len(nonfollowers_df) * 0.1))

            # Sample using 'score' as weights
            diff = 1/(nonfollowers_df['diff'] + 1e-6)
            
            sampled_df = nonfollowers_df.sample(n=num_to_pick, weights=diff, replace=False)
            
            # print('------------------')
            # print(sampled_df)
            
            for nonfol in sampled_df.index.tolist():
                add_edges.append((n, nonfol))
            
            # sys.exit()
            

    # print("Here")
    # print(remove_edges)
    # print(add_edges)
    
    G.remove_edges_from(remove_edges)
    G.add_edges_from(add_edges)
    
    return G

In [4]:
# trending_topics = {1:[-1, -1.2], 2:[2, 0], 3:[-2, 0.3], 4:[0, 12], 5:[8, 10], 6:[0, 3]}

def normalize(i, weighted_dict):
    
    # wd = weighted_dict.deepcopy()
    
    wd = {k: v[:] for k, v in weighted_dict.items()}

    vals = [v[i] for v in wd.values()]
    min_v, max_v = min(vals), max(vals)
    rng = max_v - min_v or 1
    for v in wd.values(): v[i] = (v[i] - min_v) / rng
    
    return wd

In [5]:
import numpy as np
import random

def normalize_list(v):
    v = np.array(v)
    v_min, v_max = np.min(v), np.max(v)
    if v_max - v_min == 0:
        return np.zeros_like(v)
    return (v - v_min) / (v_max - v_min)

def pick_an_author(data):
    
    # normalized_data = {k: normalize_list(v) for k, v in data.items()}
    # print(normalized_data)
    aggregates = {k: np.mean(v) for k, v in data.items()}
    total = sum(aggregates.values())
    if total == 0:
        return random.choice(list(data.keys()))
    probabilities = [aggregates[k] / total for k in data.keys()]
    author_id = random.choices(list(data.keys()), weights=probabilities, k=1)[0]
    # print(author_id, " data: ", data, "probabilities", probabilities)
    
    return author_id

In [6]:

def pick_a_topic(trending_topics, author_id, topic_choices, news_topic_pref, post_conf):
    
    if post_conf.shape[0] == 0:
        print("No More posts to choose from!")
        return None
        # sys.exit()
        
    if len(topic_choices) == 1:
        return topic_choices[0], trending_topics, topic_choices, news_topic_pref
        
    if all(all(v == 1 for v in values) for values in trending_topics.values()) or all(all(v == 0 for v in values) for values in trending_topics.values()):
        # print()
        w = [news_topic_pref[author_id][i-1] for i in topic_choices]
        # print("weights ", w)
        return random.choices(topic_choices, weights = w)[0], trending_topics, topic_choices, news_topic_pref
    
    trending_topics = normalize(0, trending_topics)
    trending_topics = normalize(1, trending_topics)
    
    keys, weights = zip(*[(k, sum(v) / len(v)) for k, v in trending_topics.items()])
    topic_weights = [news_topic_pref[author_id][i-1] for i in topic_choices]
    weights = [w*n for w, n in zip(weights, topic_weights)]
    try:
        topic_id = random.choices(keys, weights=weights)[0]
        
    except Exception as e:
        print("weights ", weights)
        sys.exit()
    
    # print("topic", topic_id, "\t", post_conf[post_conf['topic'] == topic_id].shape, "\t")
    
    # sampled_post = post_conf[post_conf.topic == picked_topic].sample()
    
    if post_conf[post_conf['topic'] == topic_id].shape[0] == 0:
        
        topic_choices.remove(topic_id)
        
        del trending_topics[topic_id]
        
        # print("Here")
        # print("Here topic", topic_id, "\t", post_conf[post_conf['topic'] == topic_id].shape, "\t", trending_topics, topic_choices)
        # time.sleep(5)
        topic_id, trending_topics, topic_choices, news_topic_pref = pick_a_topic(trending_topics, author_id, topic_choices, news_topic_pref, post_conf)
        
    return topic_id, trending_topics, topic_choices, news_topic_pref

In [7]:
# def pick_an_author(news_stats): 
    
#     if all(all(v == 1 for v in values) for values in news_stats.values()) or all(all(v == 0 for v in values) for values in news_stats.values()):
#         return random.choices([100, 101, 102])[0]
    
#     print("Before ", news_stats)
#     news_stats = normalize(0, news_stats)
#     news_stats = normalize(1, news_stats)
    
#     keys, weights = zip(*[(k, sum(v) / len(v)) for k, v in news_stats.items()])
#     print("HEREEE", news_stats, zip(*[(k, sum(v) / len(v)) for k, v in news_stats.items()]))
#     author_id = random.choices(keys, weights=weights)[0]
    
#     return author_id

# def pick_a_topic(trending_topics, author_id, topic_choices, news_topic_pref, post_conf):
    
#     if post_conf.shape[0] == 0:
#         print("No More posts to choose from!")
#         return None
#         # sys.exit()
        
#     if len(topic_choices) == 1:
#         return topic_choices[0], trending_topics, topic_choices, news_topic_pref
        
#     if all(all(v == 1 for v in values) for values in trending_topics.values()) or all(all(v == 0 for v in values) for values in trending_topics.values()):
#         # print()
#         w = [news_topic_pref[author_id][i-1] for i in topic_choices]
#         # print("weights ", w)
#         return random.choices(topic_choices, weights = w)[0], trending_topics, topic_choices, news_topic_pref
    
#     trending_topics = normalize(0, trending_topics)
#     trending_topics = normalize(1, trending_topics)
    
#     keys, weights = zip(*[(k, sum(v) / len(v)) for k, v in trending_topics.items()])
#     topic_weights = [news_topic_pref[author_id][i-1] for i in topic_choices]
#     weights = [w*n for w, n in zip(weights, topic_weights)]
#     try:
#         topic_id = random.choices(keys, weights=weights)[0]
        
#     except Exception as e:
#         print("weights ", weights)
#         sys.exit()
    
#     # print("topic", topic_id, "\t", post_conf[post_conf['topic'] == topic_id].shape, "\t")
    
#     # sampled_post = post_conf[post_conf.topic == picked_topic].sample()
    
#     if post_conf[post_conf['topic'] == topic_id].shape[0] == 0:
        
#         topic_choices.remove(topic_id)
        
#         del trending_topics[topic_id]
        
#         # print("Here")
#         # print("Here topic", topic_id, "\t", post_conf[post_conf['topic'] == topic_id].shape, "\t", trending_topics, topic_choices)
#         # time.sleep(5)
#         topic_id, trending_topics, topic_choices, news_topic_pref = pick_a_topic(trending_topics, author_id, topic_choices, news_topic_pref, post_conf)
        
#     return topic_id, trending_topics, topic_choices, news_topic_pref


def sample_post(post_conf, picked_topic, author_id, data):
    
    stance = data.iloc[author_id][f'topic_{picked_topic}']
    
    # print("stance ",  stance)
    post_conf = post_conf[post_conf.topic == picked_topic]
    post_conf['diff'] = (post_conf['stance'] - stance).abs()
    
    # print("here ", picked_topic)
    # print(post_conf)
    
    probs = 1 / (post_conf['diff'] + 1e-6)
    row = post_conf.sample(weights=probs)
    
    return row

In [8]:
def compare_data(df, data, attr):
    
    for i in range(df.shape[0]):
        if(df.iloc[i][attr] != data.iloc[i][attr]):
            print("old value ", df.iloc[i][attr], " \t New value", data.iloc[i][attr])

def update_user_attributes(data, G, user, agg_sanct, attr):
    
    old_attribute = data.copy().iloc[user][attr]
    
    update_val = agg_sanct * 0.1
    
    update_val = max(-0.05, min(update_val, 0.05))
    
    ## NEED TO CHANGE THIS FORMULA TO BE SOMETHING MORE MEANINGFUL
    new_attribute = old_attribute + update_val
    
    #Update the user activity with the new value (a bounded value between 0 and 1)
    new_attribute = max(min(1, new_attribute), 0)
    #data.at[user, 'privacy'] = new_attribute
    
    if attr == 'privacy':
        if user in user_sanc_pr:
            user_sanc_pr[user].append(agg_sanct)
        else:
            user_sanc_pr[user] = [agg_sanct]
    
    if attr == 'activity':
        if user in user_sanc_act:
            user_sanc_act[user].append(agg_sanct)
        else:
            user_sanc_act[user] = [agg_sanct]
    
    # if(old_attribute == new_attribute and agg_sanct > 1e-14 and old_attribute != 1):
    #     print(old_attribute)
    #     print(agg_sanct)
    #     sys.exit()
    
    # print("before ", data.iloc[user])
    
    data[attr].iloc[user] = new_attribute
    G.nodes[user][attr] = new_attribute
    
    # print("Update user attribute :", attr, old_attribute, new_attribute, user)
    
    # print("After ", data.iloc[user])
    
    return data, G
    
# def attitude_shift(sanction_score, att_diff, author_inclination, reciever_inclination):
    
#     attitude_shft = 0.1 * sanction_score * 1/(att_diff+1)
#     if(author_inclination >= reciever_inclination):
#         new_user_incl = author_inclination - attitude_shft
#     else:
#         new_user_incl = author_inclination + attitude_shft
        
#     return new_user_incl


def update_satisfaction(data, G, user, agg_sanct):
    
    old_satisfaction = data.copy().iloc[user]['satisfaction']
    
    # old_satisfaction = user_data['satisfaction']
    
    ## NEED TO CHANGE THIS FORMULA TO BE SOMETHING MORE MEANINGFUL
    new_satisfaction = old_satisfaction + (agg_sanct * 0.1)
    
    # print("before ", data.iloc[user])
    
    if user in user_sanc_sat:
        user_sanc_sat[user].append(agg_sanct)
    else:
        user_sanc_sat[user] = [agg_sanct]
    
    data['satisfaction'].iloc[user] = new_satisfaction
    G.nodes[user]['satisfaction'] = new_satisfaction
    
    # print("Update in satisfaction :", old_satisfaction, new_satisfaction, user)
    # sys.exit()
    
    # print("After ", data.iloc[user])
    # sys.exit()
    
    return data, G

def clamp(num, minn, maxx):
    return minn if num < minn else maxx if num > maxx else num


# def update_activity(data, G, state, post, lower_attd_th, upper_attd_th):
    
#     old_activity = data.copy()
#     # old_activity['shift'] = [0] * old_activity.shape[0]
#     topic_id = post['topic']
    
#     # user_stance = data[f'topic_{topic_id}'].copy()
#     # old_activity[(abs(user_stance - post['stance']) < lower_attd_th)]
#     # print(old_activity[f'topic_{topic_id}'].shape)
    
#     old_activity['shift'] = np.select([abs(old_activity[f'topic_{topic_id}'] - post['stance']) < lower_attd_th, abs(old_activity[f'topic_{topic_id}'] - post['stance']) > upper_attd_th], [1, -1], default=0)
    
#     # print(old_activity['shift'].value_counts())
    
#     state_df = pd.Series(state)
#     state_df = state_df.map(lambda x: 1 if x != 2 else 0)
    
#     # print(state_df.value_counts())
    
#     old_activity['activity'] = old_activity['activity'] + old_activity['activity'] * old_activity['shift'] * state_df * post['stance'] * 0.1
#     old_activity['activity'] = old_activity['activity'].clip(lower=0, upper=1)
#     # print(new_activity.describe())
#     # sys.exit()

#     return old_activity

def update_user_preferences(sG, G, data, state, post, lower_attd_th, upper_attd_th):
    
    topic_id = post['topic']
    labels = nx.get_edge_attributes(sG,'weight')
    outgoing = list(set([x[0] for x in labels.keys()]) - set([100, 101, 102]))
    incoming = list(set([x[1] for x in labels.keys()]) - set([100, 101, 102]))
    updated_data = data
    
    # print('labels ', labels)
    
    # print('outgoing ', outgoing)
    
    # print('incoming ', incoming)
    
    # print('post ', post)
    
#     sys.exit()
    
    # print(updated_data.shape)
    # print(updated_data)
    # sys.exit()
    
    for n in incoming:
        lab = [labels[x] for x in labels if x[1] == n]
        agg_sanct = sum(lab)/len(lab)
        
        # print(n, agg_sanct)
        # sys.exit()
        
        updated_data, G = update_user_attributes(updated_data, G, n, agg_sanct, 'privacy')
        updated_data, G = update_satisfaction(updated_data, G, n, agg_sanct)
        
    # print("Updated Data", updated_data)
    # sys.exit()
        
    for n in outgoing:
        lab = [labels[x] for x in labels if x[0] == n]
        agg_sanct = sum(lab)/len(lab)
        
        # print(n, agg_sanct)
        # sys.exit()
        
        updated_data, G = update_user_attributes(updated_data, G, n, agg_sanct, 'activity')
        # updated_data, G = update_satisfaction(updated_data, G, n, agg_sanct)
    
    # updated_data = update_activity(updated_data, G, state, post, lower_attd_th, upper_attd_th)
    
    for node in incoming:
        
        edges = sG.in_edges(node, data=True)
        gama = 0
        temp_df = pd.DataFrame(columns = ['author', 'author_pol', 'reciever', 'reciever_pol', 'att_diff', 'sanction_scores', 'lat_acc', 'lat_rej', 'att_shift'])
        
        # if(len(edges) > 1):
        #     print(edges)
        #     print("HEEEEERRRRREEEEEEEEE")
        #     sys.exit()
        
        att_change = 0
        for x in edges:
            reciever = x[0]
            author = x[1]
            sanction_score = x[2]['weight']
            
            lat_acc = updated_data[updated_data['id'] == author]['lat_acc'].values[0]
            lat_rej = updated_data[updated_data['id'] == author]['lat_rej'].values[0]

            reciever_inclination = updated_data[updated_data['id'] == reciever]['pol_inclination'].values[0]
            author_inclination = updated_data[updated_data['id'] == author]['pol_inclination'].values[0]
            att_diff =  abs(reciever_inclination - author_inclination)
            
            att_shift = 0
            
            if att_diff <= lat_acc:
                att_shift = (sanction_score * 0.1)/(1+att_diff)
                
            elif att_diff >= lat_rej:
                att_shift = -1 * (sanction_score * 0.1)/(1+att_diff)
                
            temp_dict = {'author':author, 'author_pol':author_inclination, 'reciever':reciever, 'reciever_pol':reciever_inclination, 'att_diff':att_diff, 
                         'sanction_scores':sanction_score, 'lat_acc': lat_acc, 'lat_rej': lat_rej, 'att_shift': att_shift}

            # print("YHh", temp_df.shape, len(temp_dict), pd.DataFrame([temp_dict]).shape)
            # sys.exit()
            # temp_df = temp_df.append(temp_dict, ignore_index=True)
            temp_df = pd.concat([temp_df, pd.DataFrame([temp_dict])], ignore_index=True)
            
            att_change += att_shift
            
            # new_stance = G.nodes[author]['topic_' + str(topic_id)] + att_shift
            
            # updated_data['topic_' + str(topic_id)].iloc[author] = new_stance
            # G.nodes[author]['topic_' + str(topic_id)] = new_stance
            
        # temp_df['adj_att_diff'] = temp_df['att_diff'] + 1
        
        # temp_df['att_shift'] = temp_df['sanction_scores'] * 0.1/temp_df['adj_att_diff']
        
        att_change = max(-0.05, min(att_change, 0.05))
        
        new_stance = updated_data['topic_' + str(topic_id)].iloc[author] + att_change
        new_stance = np.clip(new_stance, -1, 1)
        
        updated_data['topic_' + str(topic_id)].iloc[author] = new_stance
        G.nodes[author]['topic_' + str(topic_id)] = new_stance
        
    return updated_data, G

def update_pol_pol_in_graph(data, G):
    
    for n in G:
        G.nodes[n]['pol_inclination'] = data.iloc[n]['pol_inclination']
        
    return G

def run_simulation(post, G, steps, seed):

    
    model = NetworkInformationDiffusionModel(post, G, se_flag, se_threshold, topic_weights, seed)
    for i in range(steps):

        model.reset_randomizer(seed)
        model.step(i)
        #agent_state = model.datacollector.get_agent_vars_dataframe()
        #X = pd.pivot_table(agent_state.reset_index(), index='Step', columns='State', aggfunc=np.size, fill_value=0)  
        
    #print(model.datacollector.get_agent_vars_dataframe())
    agent_state = model.datacollector.get_agent_vars_dataframe()
    
    states = [int(i.state) for i in model.grid.get_all_cell_contents()]
    agents = model.agents
    
    return model, states, agents, model.G, model.G_share, agent_state

def start_simulation(c_i, c_k, data, G, post_conf, n_topics, lower_attd_th, upper_attd_th, seed, trending_topics, topic_choices, news_topic_pref):
    
    steps=10
    SimModel, states, agents, AgentGraphs, runtime = {}, [], [], [], []
    network_homophily = []
    net_homophily = []
    polarization, polarization2 = [], []
    satisfaction = []
    activity = []
    users_activities = []
    privacy = []
    polarity = []
    avg_polarity = []
    agent_pol_inclination = []
    SancGraphs = []
    i = 0
    sharing_details = []
    updated_data = pd.DataFrame()
    G_agents = None
    # print(c_k, "\t", c_i, "\t")
    
    # for j, post in post_conf.iterrows():
    while post_conf.shape[0] > 0:

        i+=1
        
        st=time.time()
        # author_id = pick_an_author(data, pc)
        author_id = pick_an_author(news_stats)
        # print(news_stats, "\t", author_id, "\n\n")
        
        # print("author_id", author_id)
        
        # print("topic_choices ", topic_choices)
        # print("trending_topics ", trending_topics)
        
        # print("Here ", pick_a_topic(trending_topics, author_id, topic_choices, post_conf))
        
        # sys.exit(0)
        
        picked_topic, trending_topics, topic_choices, news_topic_pref = pick_a_topic(trending_topics, author_id, topic_choices, news_topic_pref, post_conf)
        
        if not picked_topic and post_conf[post_conf.topic == picked_topic].shape[0] == 0:
            print("no more posts for ", picked_topic, post_conf[post_conf.topic == picked_topic].shape)
            sys.exit(0)
        
        # print("picked_topic", picked_topic)
        
        # print(i, picked_topic, post_conf.shape, post_conf[post_conf.topic == picked_topic].shape, '\n')
        print("\t", i, "\t", post_conf.shape, end = '\r')
        
        sampled_post = sample_post(post_conf, picked_topic, author_id, data)
        # post_conf[post_conf.topic == picked_topic].sample()
        post_conf.drop(sampled_post.index, inplace = True)
        # post_conf = 
        
        #author_id = int(post['author_id'])
        
#         if(author_id == None):
#             continue

        post = dict()
        post['author'] = author_id
        post['topic'] = sampled_post.topic.values[0]
        post['stance'] = sampled_post.stance.values[0]
        
        # print(post)
        # sys.exit()

        SimModel[i], state, agent, G_agents, G_sanctions, agent_state = run_simulation(post, G, steps, seed)
        
        # return G_sanctions
        
        # print(len(state))
        # print(agent_state)
        # sys.exit(0)
        
        # et = time.time()
        # rt = round(et-st, 6)

        states.append(state) 
        agents.append(agent)
        AgentGraphs.append(G_agents)
        SancGraphs.append(G_sanctions)
        # AgentStates.append(agent_state)
        # runtime.append(rt)

        author = author_id
        og_agents_count = sum([1 for x in state if x == 0])
        received_agents_count_ = sum([1 for x in state if x == 1])
        not_received_agents_count = sum([1 for x in state if x == 2]) - 2
        spreader_agents_count = sum([1 for x in state if x == 3])
        disinterested_agents_counts = sum([1 for x in state if x == 4])
        received_agents_count = spreader_agents_count + disinterested_agents_counts + received_agents_count_
        
        x = not_received_agents_count + received_agents_count
        if abs(100 - x) > 0 or og_agents_count != 1:
            # print("here ",i, x, author, not_received_agents_count, received_agents_count, received_agents_count_, spreader_agents_count, disinterested_agents_counts, og_agents_count)
            print("Number of agents exceeds 100.")
            print(i, og_agents_count, author_id, received_agents_count_, not_received_agents_count, spreader_agents_count, disinterested_agents_counts,received_agents_count)
            sys.exit()
            
            
        # sys.exit()
        
        content_reach = received_agents_count * 0.01
        sanc = list(nx.get_edge_attributes(G_sanctions, 'weight').values())
        sanc = [abs(s) for s in sanc]
        agg_sanctions = sum(sanc) / len(sanc) if sanc else 0
        
        alpha = 0.9
        beta = 0.1
        
        news_stats[author_id][0] = alpha * news_stats[author_id][0] + beta * content_reach
        news_stats[author_id][1] = alpha * news_stats[author_id][1] + beta * agg_sanctions
        
        # print(i, news_stats, author, content_reach, agg_sanctions)
        
        # if i+1 % 100 == 0:
            # sys.exit()
        
        trending_topics[post['topic']][0] = beta * trending_topics[post['topic']][0] + alpha * content_reach
        
        if post['stance'] * data.iloc[author_id][post['topic']] >= 0:
            trending_topics[post['topic']][1] = beta * trending_topics[post['topic']][1] + alpha * agg_sanctions
        else:
            trending_topics[post['topic']][1] = beta * trending_topics[post['topic']][1] - alpha * agg_sanctions

        #print(state)
        sharing_details.append([post['topic'], round(post['stance'], 6), author_id, received_agents_count, not_received_agents_count, spreader_agents_count, disinterested_agents_counts])
        #print(sharing_details)

        # Visualization().plot_sim_network(G_agents, state)
        # Visualization().plot_sanction_graph(G_sanctions)
        # sys.exit()
        
        # Update user preferences based on sanctions received from other agents
        data_updated, G_agents = update_user_preferences(G_sanctions, G_agents, data.copy(), state, post, lower_attd_th, upper_attd_th)
        
        updated_data = data_updated.copy()
        
        # print("Comparison ", data['activity'].compare(updated_data['activity']))
        data = data_updated
        
#         print(updated_data.shape, updated_data.columns)
#         print(n_topics)
        
#         topics = ['topic_' + str(x) for x in range(n_topics)]
        
#         kx = updated_data[topics].mean(axis = 1)
#         temp_df = updated_data[topics]
#         print(temp_df.shape, temp_df.columns)
#         print(temp_df.head())
#         print(kx)
        
#         sys.exit()
        
        # pol_inclination = Data_obj.get_agent_pol_inclinations(updated_data, n_topics)
        # pol_inclination = get_agent_pol_inclinations(updated_data)
        # updated_data['pol_inclination'] = (updated_data[['topic_1', 'topic_2', 'topic_3']].mean(axis=1) - updated_data[['topic_4', 'topic_5', 'topic_6']].mean(axis=1))/2
        updated_data['pol_inclination'] = ((updated_data[['topic_1', 'topic_2', 'topic_3']] * -1).sum(axis = 1) + (updated_data[['topic_4', 'topic_5', 'topic_6']]).sum(axis = 1))/6
        pol_inclination = updated_data['pol_inclination']
        
        if(updated_data['pol_inclination'].isna().sum() > 0):
            #print(pol_inclination)
            
            print("updated_data ", updated_data)
            sys.exit()
            
        # updated_data['pol_inclination'] = pol_inclination
        
        agent_pol_inclination.append(pol_inclination)
        
        G_agents = update_pol_pol_in_graph(updated_data, G_agents)
        
        net_satisfaction = updated_data['satisfaction'].mean()
        satisfaction.append(net_satisfaction)
        
        mean_activity = updated_data['activity'].mean()
        users_activities.append(updated_data['activity'])
        activity.append(mean_activity)
        
        mean_privacy = updated_data['privacy'].mean()
        users_activities.append(updated_data['privacy'])
        privacy.append(mean_privacy)
        
        pol = round(math.sqrt(sum([x*x for x in updated_data['pol_inclination']])/updated_data.shape[0]), 6)
        
        if(math.isnan(pol)):
            
            print(updated_data['pol_inclination'])
            sys.exit()
        
        avg_pol = round(updated_data['pol_inclination'].mean(), 6)
        
        temp_G = G.copy()
        
        node_attr = updated_data.set_index('id').to_dict('index')
        # print(node_attr)
        # print((data['activity'] - updated_data['activity']).sum())
        
        # sys.exit()
        nx.set_node_attributes(temp_G, node_attr)
         
        for n in temp_G.nodes:
            if(node_attr[n]['pol_inclination'] < -0.6):
                node_attr[n]['pol_inclination_grp'] = -2
            elif((node_attr[n]['pol_inclination'] >= -0.6) and ((node_attr[n]['pol_inclination'] < -0.2))):
                node_attr[n]['pol_inclination_grp'] = -1
            elif((node_attr[n]['pol_inclination'] >= -0.2) and ((node_attr[n]['pol_inclination'] <= 0.2))):
                node_attr[n]['pol_inclination_grp'] = 0
            elif((node_attr[n]['pol_inclination'] > 0.2) and ((node_attr[n]['pol_inclination'] <= 0.6))):
                node_attr[n]['pol_inclination_grp'] = 1
            elif(node_attr[n]['pol_inclination'] > 0.6):
                node_attr[n]['pol_inclination_grp'] = 2
                
        nx.set_node_attributes(temp_G, node_attr)
        hom = nx.attribute_assortativity_coefficient(temp_G, "pol_inclination_grp")
        net_homophily.append(hom)

        for n in temp_G.nodes:
            node_attr[n]['pol_inclination_grp'] = round(node_attr[n]['pol_inclination'] * 10)        
        nx.set_node_attributes(temp_G, node_attr)
        
        hom = nx.attribute_assortativity_coefficient(temp_G, "pol_inclination_grp")
        
        if(math.isnan(hom) and len(set(nx.get_node_attributes(temp_G, "pol_inclination_grp").values())) == 1):
            hom = 1
        
        polarization.append(pol)
        network_homophily.append(hom)
        polarity.append(avg_pol)
        polarization2.append(compute_polarization(pol_inclination))
        
        if i % 10 == 0:
            G = add_edges(G, data)
            G = sever_edges(G, data)
            
            aggSanc = aggregate_sanction_graph(SancGraphs)
            
            G = update_news_followers(G, data, aggSanc)
            
            G, data = update_tolerance(aggSanc, G, data)
            
            SancGraphs = []
            

#         agent_states_df = pd.DataFrame(states)
#         agent_pol_inclination_df = pd.DataFrame(agent_pol_inclination)
#         activity_df = pd.DataFrame(users_activities)

        # agent_states_df = pd.DataFrame()
        # agent_pol_inclination_df = pd.DataFrame()
        # activity_df = pd.DataFrame()
        # privacy_df = pd.DataFrame()
    
    return updated_data, G_agents, sharing_details, polarization, polarization2, network_homophily, net_homophily, polarity, satisfaction, activity, privacy, states
   

def save_results_to_dir(run, epoch, data, mypath, sharing_details, net_polarization, net_polarization2, network_homophily, net_homophily, polarity, satisfaction, activity, privacy):
    
    results_df = pd.DataFrame(sharing_details, columns = ['topic', 'post_stance', 'author_id', 'num_of_agents_received', 'num_of_agents_not_received', 
                                                                  'num_of_spreader_agents', 'num_of_disinterested_agents'])
    
    results_df['network_polarization'] = net_polarization
    results_df['net_polarization2'] = net_polarization2
    results_df['network_homophily'] = network_homophily
    results_df['network_homophily2'] = net_homophily
    results_df['network_polarity'] = polarity
    results_df['satisfaction'] = satisfaction
    results_df['activity'] = activity
    results_df['user_preference'] = privacy
    
    #mypath = '../results/sharing_details/'
    results_df.to_csv(mypath + 'results_' + str(run) + '.csv')
#     agent_states_df.to_csv(mypath + 'agent_states_' + str(run) + '.csv')
#     agent_pol_inclination_df.to_csv(mypath + 'agent_polIncl_' + str(run) + '.csv')
#     activity_df.to_csv(mypath + 'activity_' + str(run) + '.csv')
    #data.to_csv(mypath + 'network_data_' + str(epoch) + '.csv')
    
    return


def save_graph(run, i, polarization, flag):

    fig, ax = plt.subplots()
    plt.plot(range(len(polarization)), polarization)
    if(flag == 3):
        filepath = '../results/results_' + str(run) + '/satisfaction_' +'.jpg'
    elif(flag == 2):
        filepath = '../results/results_' + str(run) + '/network_polarity_' +'.jpg'
    elif(flag == 1):
        filepath = '../results/results_' + str(run) + '/polarization_' +'.jpg'
    elif(flag == 0):
        filepath = '../results/results_' + str(run) + '/network_homophily_' +'.jpg'
        
    plt.savefig(filepath)
    plt.show()
    
    return
    
def save_data(run, i, data, flag, mypath):
    
    if os.path.isdir(mypath) == False:
        os.mkdir(mypath)
        
    if(flag == 1):
        data.to_csv(mypath + 'initial_data.csv')
    elif(flag == 2):
        data.to_csv(mypath + 'final_data_' + str(run) +'.csv')
    
    return

In [9]:
# post_conf = post_conf.groupby('topic', group_keys=False).apply(lambda x: x.sample(frac=0.1))

In [10]:
range(1,3)

range(1, 3)

### Selective Exposure

In [None]:
# sjt_flag= True
topic_choices = [1,2,3,4,5,6]
trending_topics = {1:[1, 1], 2:[1, 1], 3:[1, 1], 4:[1, 1], 5:[1, 1], 6:[1, 1]}
news_stats = {100: [1, 1],
              101: [1, 1],
              102: [1, 1]}

news_topic_pref = {100: [1,1,1,1,1,1],
                      101: [2,2,2,1,1,1],
                      102: [1,1,1,2,2,2]}

user_sanc_sat, user_sanc_pr, user_sanc_act = {}, {}, {}

lower_attd_th = 0.6
upper_attd_th = 1.4
se_flags = [False, True, True, True]
se_thresholds = [0, 0.4, 1.0, 1.6]
n_topics = 6
topic_weights = [1] * (n_topics + 1)
# post_conf =  pd.read_cse_thresholdssv('data/posts_conf.csv')
# seeds = [x for x in range(3,11)]
seeds = [1,2,3,4,5,6,7,8,9,10]
import time

start_time = time.time()

for k in seeds:
    
    for i in range(1,3):
        
        # i = 0

        topic_choices = [1,2,3,4,5,6]
        trending_topics = {1:[1, 1], 2:[1, 1], 3:[1, 1], 4:[1, 1], 5:[1, 1], 6:[1, 1]}
        news_stats = {100: [1, 1],
                      101: [1, 1],
                      102: [1, 1]}

        news_topic_pref = {100: [1,1,1,1,1,1],
                              101: [2,2,2,1,1,1],
                              102: [1,1,1,2,2,2]}

        user_sanc_sat, user_sanc_pr, user_sanc_act = {}, {}, {}
        # k = 1
        random.seed(k)
        # data_path = 'initial_data_' + str(k) + '.csv'
        data_path = f'initial_data/initial_data_{k}.csv'
        initial_data = pd.read_csv(data_path)

        post_conf =  pd.read_csv('initial_data/post_conf_20k.csv')
        post_conf = post_conf.rename(columns = {'issue': 'topic'})
        post_conf['topic'] = post_conf.topic.replace(0, 6)
        # post_conf = post_conf.groupby('topic', group_keys=False).apply(lambda x: x.sample(frac=0.001))

        # initial_graph = Data.get_fb_network(initial_data)
        # initial_data = initial_data[['topic_1', 'topic_2', 'topic_3', 'topic_4', 'topic_5', 'topic_6', 'privacy', 'activity', 'satisfaction', 'lat_acc', 'lat_rej']]
        # initial_data['pol_inclination'] = initial_data.iloc[:, :3].mean(axis=1) - initial_data.iloc[:, 3:6].mean(axis=1)
        #     print(initial_data.pol_inclination.mean())
        # initial_data_2 = pd.read_csv('../results/sharing_details_1/initial_data.csv')
        # Data_obj = Data(k)
        # initial_graph = Data_obj.get_fb_network(initial_data)

        initial_graph = None
        import pickle
        with open(f'initial_data/graph{k}.pkl', 'rb') as f:
            initial_graph = pickle.load(f)

        # print(G)

        # sys.exit()

        G_share = None

        # for i in range(4):

        # if((k == 3) and (i == 0)):
        #     continue
        # i = 0

        se_threshold = se_thresholds[i]
        se_flag = se_flags[i]

        data = initial_data.copy()
        G = initial_graph.copy()

        run = str(k) + str(i)
        #print(i, end = "\t")
        #run = i

        # print()

        rep = abs(data[data['pol_inclination'] < 0]['pol_inclination'].sum())
        dem = abs(data[data['pol_inclination'] > 0]['pol_inclination'].sum())
        initial_pol = round((rep + dem)/data.shape[0], 6) 
        initial_homophily = nx.attribute_assortativity_coefficient(G, "pol_inclination")

        if(math.isnan(initial_homophily) and len(set(nx.get_node_attributes(G, "pol_inclination").values())) == 1):
            initial_homophily = [1]

        mypath = 'results/SE/'
        #         save_data(run, 0, initial_data, 1, mypath)
        #     for i in range(2):

        # G_share = start_simulation(i, k, data.copy(), G, post_conf, n_topics, lower_attd_th, upper_attd_th, k)
        # sys.exit()

        data, G, sharing_details, net_polarization, net_polarization2, network_homophily, net_homophily, polarity, satisfaction, activity, privacy, states = start_simulation(i, k, data, G, post_conf, n_topics, lower_attd_th, upper_attd_th, k, trending_topics, topic_choices, news_topic_pref)
        # net_polarization = initial_pol + net_polarization
        # network_homophily = initial_homophily + network_homophily

        # print(data.shape)

        # save_results_to_dir(run, i, data.copy(), mypath, sharing_details, net_polarization, net_polarization2, network_homophily, net_homophily, polarity, satisfaction, activity, privacy)
        #         save_graph(run, i, net_polarization, 1)
        #         save_graph(run, i, network_homophily, 0)
        #         save_graph(run, i, polarity, 2)
        #         save_graph(run, i, satisfaction, 3)
        # save_data(run, i, data.copy(), 2, mypath)
        
        print(k, i, 'Num of edges ', len(list(G.edges()))) 
#         import pickle

        for n in G.nodes:
            G.nodes[n].pop('agent', None)
#         # Save graph using pickle
        with open(f'{mypath}graph_{run}.pkl', 'wb') as f:
            pickle.dump(G, f)

1 {100: [0.9470000000000001, 0.9], 101: [1, 1], 102: [1, 1]} 100 0.47000000000000003 0.0
2 {100: [0.9470000000000001, 0.9], 101: [0.907, 1.5849199571428572], 102: [1, 1]} 101 0.07 6.849199571428572
3 {100: [0.9470000000000001, 0.9], 101: [0.907, 1.5849199571428572], 102: [0.906, 1.7300983333333333]} 102 0.06 8.300983333333333
4 {100: [0.9470000000000001, 0.9], 101: [0.907, 1.5849199571428572], 102: [0.8204, 2.3687374400000003]} 102 0.05 8.116489399999999
5 {100: [0.9470000000000001, 0.9], 101: [0.8213, 2.1658610214285714], 102: [0.8204, 2.3687374400000003]} 101 0.05 7.394330599999999
6 {100: [0.9470000000000001, 0.9], 101: [0.74417, 2.4383723992857145], 102: [0.8204, 2.3687374400000003]} 101 0.05 4.8909748
7 {100: [0.9470000000000001, 0.9], 101: [0.672753, 2.9318741260238097], 102: [0.8204, 2.3687374400000003]} 101 0.03 7.373389666666665
8 {100: [0.9470000000000001, 0.9], 101: [0.6084777, 2.905398613421429], 102: [0.8204, 2.3687374400000003]} 101 0.03 2.667119
9 {100: [0.94700000000000

In [None]:
# print(i, og_agents_count, author_id, received_agents_count_, not_received_agents_count, spreader_agents_count, disinterested_agents_counts, received_agents_count)
print('before ', news_stats, content_reach, agg_sanctions)

In [None]:
# data = {100: [1, 1], 101: [1, 1], 102: [0.898, 0.9]} 
# probabilities = [0.3449465332873405, 0.3449465332873405, 0.3101069334253191]
# l = []
# for _ in range(101):
#     l.append(random.choices(list(data.keys()), weights=probabilities, k=1)[0])

# pd.Series(l).value_counts()

In [None]:
# import matplotlib.pyplot as plt

# # color_map = ['red'] * 50 + ['blue'] * 50 + ['orange', 'green', 'brown']  # Regular, Influential, Superinfluential

# indegrees = [G.degree(n) * 10 for n in G.nodes()]  # Scale for visibility

# pos = nx.spring_layout(G)
# # sns.set_style("white")
# # nx.draw(G, pos, with_labels=True)
# # plt.show()

# plt.figure(figsize=(8, 8))
# nx.draw(G, with_labels=False, edge_color="gray", node_size=indegrees)
# plt.show()

In [None]:
# sorted_nodes = sorted(G.nodes, key=lambda n: G.degree(n), reverse=True)
# sorted_nodes