In [None]:
import pandas as pd
import graph_tool.all as gt
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import io
from PIL import Image
import numpy as np

# Load nodes and edges data
nodes2 = pd.read_csv("./edges_longcovid_processed_cut_weight5.edges_node_info_merge.csv", sep=";")
nodes2 = nodes2.dropna(subset=['PoliticalIndex'])
edges = pd.read_csv('/Users/annabertani/polarization_graph/edges_membership_layout_w2-w20/edges_longcovid_processed_cut_weight5.edges', sep=" ", header=None)

edges.rename(columns={0: 'UserFrom', 1: 'UserTo', 2: 'weights'}, inplace=True)
nodes_ = nodes2[nodes2['User'].isin(list(edges['UserTo'])) | nodes2['User'].isin(list(edges['UserFrom']))]
nodes_['id'] = range(1, 1 + len(nodes_))

edges_ = pd.merge(edges, nodes_[['User', 'id']], left_on='UserFrom', right_on='User')
edges_.rename(columns={'id': 'source'}, inplace=True)
edges_ = pd.merge(edges_, nodes_[['User', 'id']], left_on='UserTo', right_on='User')
edges_.rename(columns={'id': 'target'}, inplace=True)

del edges_['User_x']
del edges_['User_y']
edges_ = edges_[edges_['source'] != 10709]
edges_ = edges_[edges_['source'] != 8708]
print('done cleaning')

# Create a Graph-tool graph
g = gt.Graph(directed=False)
vertex_map = {}
user_property = g.new_vertex_property("string")
for idx, row in nodes_.iterrows():
    v = g.add_vertex()
    vertex_map[row['User']] = v
    user_property[v] = row['User']
g.vertex_properties["User"] = user_property

for _, row in edges_.iterrows():
    g.add_edge(vertex_map[row['UserFrom']], vertex_map[row['UserTo']])

# Find the largest connected component
labels, hist = gt.label_components(g)
largest_component_label = np.argmax(hist)
largest_component_mask = (labels.a == largest_component_label)
print('g_lcc')

# Create a subgraph of the largest connected component
g_lcc = gt.GraphView(g, vfilt=largest_component_mask)

# Create a reliability property for the largest component graph
reliability = g_lcc.new_vertex_property("float")
for v in g_lcc.vertices():
    user = g_lcc.vp["User"][v]
    reliability[v] = nodes_[nodes_['User'] == user]['PoliticalIndex'].values[0]
g_lcc.vertex_properties["reliability"] = reliability

# Position vertices using SFDP layout
pos = gt.sfdp_layout(g_lcc)

# Color nodes by reliability index using a colormap
reliability_cmap = cm.get_cmap('coolwarm_r')
norm = plt.Normalize(vmin=min(nodes_['PoliticalIndex']), vmax=max(nodes_['PoliticalIndex']))

# Loop through the levels and draw the graph
print('state')
state = gt.minimize_nested_blockmodel_dl(g_lcc, state_args=dict(deg_corr=True))
S1 = state.entropy()
for i in range(1000):  # this should be sufficiently large
    state.multiflip_mcmc_sweep(beta=np.inf, niter=10)
S2 = state.entropy()
print("Improvement:", S2 - S1)


In [None]:
for i in range(len(state.levels) - 1):
    block_projection = state.project_partition(i, 0)
    print("Level:", i)
    
    # Create a buffer to hold the image
    buf = io.BytesIO()
    
    # Alternate layout for first level
    #layout = gt.arf_layout(g_lcc) if i == 0 else pos
    
    # Draw the graph and save to buffer
    gt.graph_draw(
        g_lcc,
        pos=pos,
        output_size=(3000, 3000),  # Set a higher output size for better resolution
        edge_pen_width=1.2,  # Increase the width of the edges
        edge_color=[0, 0, 0, 0.5],  # Make edges semi-transparent
        vertex_fill_color=reliability,  # Color nodes based on reliability index
        vcmap=reliability_cmap,
        vertex_color=[1, 1, 1, 0],  # Ensure no outline color affects the fill
        vertex_size=gt.prop_to_size(reliability, mi=5, ma=15),  # Scale node size based on reliability index
        bg_color=[1, 1, 1, 1],  # Set background color to white
        output=buf,  # Save to buffer
        fmt='png'
    )
    
    # Move the buffer position to the beginning
    buf.seek(0)
    
    # Open the image with PIL
    image = Image.open(buf)
    
    # Create a figure and axis with matplotlib
    fig, ax = plt.subplots(figsize=(15, 15), dpi=300)  # Set a larger figure size and higher DPI for better resolution
    
    # Display the graph image
    ax.imshow(np.array(image))
    ax.axis('off')  # Remove axes
    
    # Add a title indicating the level
    ax.set_title(f"Level {i}", fontsize=20)
    
    sm = plt.cm.ScalarMappable(cmap=reliability_cmap, norm=norm)
    sm.set_array([])
    cbar = plt.colorbar(sm, ax=ax, fraction=0.03, pad=0.02)
    cbar.set_label('Political Index', rotation=270, fontsize=15, labelpad=20)

    # Save the final image with higher DPI
    #plt.savefig(f"level_{i}_political_graph.png", dpi=400, bbox_inches='tight')
    
    # Close the buffer and the plot
    buf.close()
    plt.close(fig)

In [2]:
import pandas as pd
import graph_tool.all as gt
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import io
from PIL import Image
import numpy as np

def load_data(node_file, edge_file, index_column):
    # Load nodes and edges data
    nodes = pd.read_csv(node_file, sep=";")
    nodes = nodes.dropna(subset=[index_column])
    edges = pd.read_csv(edge_file, sep=" ", header=None, names=['UserFrom', 'UserTo', 'weights'])
    
    nodes_filtered = nodes[nodes['User'].isin(edges['UserTo']) | nodes['User'].isin(edges['UserFrom'])]
    nodes_filtered['id'] = range(1, 1 + len(nodes_filtered))

    edges_merged = pd.merge(edges, nodes_filtered[['User', 'id']], left_on='UserFrom', right_on='User')
    edges_merged.rename(columns={'id': 'source'}, inplace=True)
    edges_merged = pd.merge(edges_merged, nodes_filtered[['User', 'id']], left_on='UserTo', right_on='User')
    edges_merged.rename(columns={'id': 'target'}, inplace=True)

    return nodes_filtered, edges_merged

def create_graph(nodes, edges, exclude_sources):
    # Create a Graph-tool graph
    g = gt.Graph(directed=False)
    vertex_map = {}
    user_property = g.new_vertex_property("string")
    for idx, row in nodes.iterrows():
        v = g.add_vertex()
        vertex_map[row['User']] = v
        user_property[v] = row['User']
    g.vertex_properties["User"] = user_property

    for _, row in edges.iterrows():
        if row['source'] not in exclude_sources:
            g.add_edge(vertex_map[row['UserFrom']], vertex_map[row['UserTo']])
    
    return g, vertex_map

def find_largest_component(g):
    # Find the largest connected component
    labels, hist = gt.label_components(g)
    largest_component_label = np.argmax(hist)
    largest_component_mask = (labels.a == largest_component_label)
    return gt.GraphView(g, vfilt=largest_component_mask)

def set_vertex_properties(g_lcc, nodes, index_column):
    # Create a property map for the specified index
    index_property = g_lcc.new_vertex_property("float")
    for v in g_lcc.vertices():
        user = g_lcc.vp["User"][v]
        index_property[v] = nodes[nodes['User'] == user][index_column].values[0]
    g_lcc.vertex_properties["index_property"] = index_property
    return index_property

def draw_zero_level_graph(g_lcc, index_property, index_column):
    # Position vertices using SFDP layout
    pos = gt.sfdp_layout(g_lcc)
    
    # Normalize index values for colormap
    norm = plt.Normalize(vmin=index_property.a.min(), vmax=index_property.a.max())
    cmap = cm.get_cmap('coolwarm_r')

    # Find the state and focus on zero level
    state = gt.minimize_nested_blockmodel_dl(g_lcc, state_args=dict(deg_corr=True))
    
    # Only plot the zero level
    buf = io.BytesIO()
    gt.graph_draw(
        g_lcc,
        pos=pos,
        output_size=(3000, 3000),
        edge_pen_width=1.2,
        edge_color=[0, 0, 0, 0.5],
        vertex_fill_color=index_property,
        vcmap=cmap,
        vertex_color=[1, 1, 1, 0],
        vertex_size=gt.prop_to_size(index_property, mi=5, ma=15),
        bg_color=[1, 1, 1, 1],
        output=buf,
        fmt='png'
    )
    
    buf.seek(0)
    image = Image.open(buf)
    fig, ax = plt.subplots(figsize=(15, 15), dpi=300)
    ax.imshow(np.array(image))
    ax.axis('off')
    #ax.set_title(f"Zero Level - {index_column}", fontsize=20)
    
    sm = plt.cm.ScalarMappable(cmap=cmap, norm=norm)
    sm.set_array([])
    cbar = plt.colorbar(sm, ax=ax, fraction=0.03, pad=0.02)
    cbar.set_label(index_column, rotation=270, fontsize=15, labelpad=20)

    plt.savefig(f"level_0_{index_column}_graph.png", dpi=400, bbox_inches='tight')
    buf.close()
    plt.close(fig)

if __name__ == "__main__":
    # Parameters
    node_file = "./edges_longcovid_processed_cut_weight5.edges_node_info_merge.csv"
    edge_file = '/Users/annabertani/polarization_graph/edges_membership_layout_w2-w20/edges_longcovid_processed_cut_weight5.edges'
    index_column = 'PoliticalIndex'  # Can be 'PoliticalIndex' or 'ReliabilityIndex'
    exclude_sources = [10709, 8708]  # Exclude these sources

    # Load data
    nodes, edges = load_data(node_file, edge_file, index_column)

    # Create graph
    g, vertex_map = create_graph(nodes, edges, exclude_sources)

    # Find largest connected component
    g_lcc = find_largest_component(g)

    # Set vertex properties
    index_property = set_vertex_properties(g_lcc, nodes, index_column)

    # Draw only the zero level of the graph
    draw_zero_level_graph(g_lcc, index_property, index_column)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  nodes_filtered['id'] = range(1, 1 + len(nodes_filtered))
  cmap = cm.get_cmap('coolwarm_r')
