In [None]:
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import json
import community as community_louvain

from textblob import TextBlob

In [None]:
# Load the uploaded file
file_path = './Scripts/Political/Conservative_data/Conservative_post_19dxyjc_data.json'
with open(file_path, 'r') as file:
    data = json.load(file)

In [None]:
# Extract comments and replies into a flat structure
def extract_comments_replies(comments, parent_author=None, parent_body_snippet=None, depth=0):
    for comment in comments:
        comment_id = f"{comment['author']} - {comment['body'][:30]}"  # Unique identifier for the comment
        yield {
            'depth': depth,
            'author': comment['author'],
            'body': comment['body'],
            'parent_author': parent_author,
            'reply_to': parent_body_snippet  # Reference to the parent comment
        }
        if 'replies' in comment:
            parent_snippet = comment['body'][:30]  # Snippet of the current comment's body
            yield from extract_comments_replies(comment['replies'], comment['author'], parent_snippet, depth + 1)

In [None]:
# Extract comments and replies
extracted_data = list(extract_comments_replies(data['comments']))

# Create a DataFrame with the comments and their replies
df_comments_replies = pd.DataFrame(extracted_data)

df_comments_replies.head(10)

In [None]:
# visualise clusters of users within this post. 
# Initialize a directed graph
G = nx.DiGraph()

# Add nodes and edges
for index, row in df_comments_replies.iterrows():
    author = row['author']
    reply_to_author = row['parent_author']
    
    # Add nodes for both the author and the parent author
    G.add_node(author)
    if pd.notna(reply_to_author):  # Check if there's a parent author (reply_to_author is not NaN)
        G.add_node(reply_to_author)
        # Add an edge from the parent author to the author (indicating a reply)
        G.add_edge(reply_to_author, author)

# You can add node and edge attributes as needed, for example, to color nodes by sentiment

In [None]:
plt.figure(figsize=(12, 12))
nx.draw(G, with_labels=True, node_size=50, font_size=8)
plt.savefig('./political_graph_figure.png', dpi=300)
plt.show()

In [None]:
# look at the reply in df_comments_replies with the highest cluster
max_depth = df_comments_replies['depth'].max()  # Find the maximum depth
comment_with_max_depth = df_comments_replies[df_comments_replies['depth'] == max_depth]  # Filter to get the comment(s)

print(comment_with_max_depth)

In [None]:
comment_with_max_depth['body']

In [None]:

# Function to calculate sentiment polarity
def calculate_sentiment(text):
    return TextBlob(text).sentiment.polarity

# Apply the function to each comment/reply in the DataFrame
df_comments_replies['sentiment_score'] = df_comments_replies['body'].apply(calculate_sentiment)

# Now, each row in the DataFrame has a 'sentiment_score' column with the sentiment polarity


In [None]:
df_comments_replies.head(10)

In [None]:
G = nx.DiGraph()

for index, row in df_comments_replies.iterrows():
    author = row['author']
    parent_author = row['parent_author']
    sentiment = row['sentiment_score']  # Assuming this column exists
    
    # Add nodes and edges with sentiment score as an edge attribute
    G.add_node(author)
    if pd.notna(parent_author):
        G.add_node(parent_author)
        G.add_edge(parent_author, author, sentiment=sentiment)


In [None]:
# Degree Centrality - centre the comments that have the most replies
degree_centrality = nx.degree_centrality(G)

# Betweenness Centrality - see how many interactions are within the replies
betweenness_centrality = nx.betweenness_centrality(G)

# Eigenvector Centrality - this will separate the non connected nodes and cluster the connected ones
eigenvector_centrality = nx.eigenvector_centrality(G, max_iter=1000)

# you want to analyze these centrality measures to identify key influencers in each post


In [None]:
G_undirected = G.to_undirected()

# Louvain method to find communities in the graph
partition = community_louvain.best_partition(G_undirected)



In [None]:
# coloring edges by sentiment score
edge_colors = [G[u][v]['sentiment'] for u, v in G.edges()]

nx.draw(G, edge_color=edge_colors, with_labels=True, node_size=50)
plt.show()
