In [2]:
import pandas as pd
import json
from textblob import TextBlob
import operator 
import networkx as nx
import matplotlib.pyplot as plt

# Load the CSV file into a pandas dataframe
df = pd.read_csv('tweets_hashtag_Putin.csv')

# Print the number of rows and columns in the dataset
print(f"Number of rows: {len(df)}, Number of columns: {len(df.columns)}")


Number of rows: 602, Number of columns: 4


In [3]:
# Group the dataframe by the 'user_name' column and count the number of rows for each group
user_counts = df.groupby('user_name').size().reset_index(name='counts')

# Sort the resulting dataframe in descending order by the count column
user_counts = user_counts.sort_values('counts', ascending=False)

# Print the user with the highest number of posts
most_active_user = user_counts.iloc[0]['user_name']
print(f"The most active Twitter user during this time frame is {most_active_user}.")

The most active Twitter user during this time frame is Asiyatu4.


In [4]:
# Initialize running totals for polarity and subjectivity
polarity_total = 0
subjectivity_total = 0

# Loop through each tweet in the dataframe
for index, row in df.iterrows():
    # Use TextBlob to perform sentiment analysis on the tweet text
    blob = TextBlob(row['text'])
    # Add the polarity and subjectivity values to running totals
    polarity_total += blob.sentiment.polarity
    subjectivity_total += blob.sentiment.subjectivity

# Calculate the average polarity and subjectivity for all tweets
num_tweets = len(df)
avg_polarity = polarity_total / num_tweets
avg_subjectivity = subjectivity_total / num_tweets

# Print the results
print(f"The average polarity for all tweets is {avg_polarity:.2f}.")
print(f"The average subjectivity for all tweets is {avg_subjectivity:.2f}.")

The average polarity for all tweets is 0.03.
The average subjectivity for all tweets is 0.29.


In [8]:
# Create an empty directed graph
graph = nx.DiGraph()

# Loop through each tweet in the dataframe
for index, row in df.iterrows():
    # Add the user and mention nodes to the graph if they don't already exist
    user = row['user_name']
    if not graph.has_node(user):
        graph.add_node(user)
    mentions = row['mentions']
    if mentions != '[]':
        # Replace the string "[]" with an empty list
        mentions = mentions.replace("'", '"')
        mentions = json.loads(mentions)
        for mention in mentions:
            if not graph.has_node(mention):
                graph.add_node(mention)
            # Add a directed edge from the user to the mention
            graph.add_edge(user, mention)


# Calculate the in-degree and out-degree centrality measures for each node
in_degree_centrality = nx.in_degree_centrality(graph)
out_degree_centrality = nx.out_degree_centrality(graph)

# Find the user with the highest in-degree centrality and the highest out-degree centrality
highest_in_degree = max(in_degree_centrality.items(), key=operator.itemgetter(1))[0]
highest_out_degree = max(out_degree_centrality.items(), key=operator.itemgetter(1))[0]

# Print the results
print(f"The user with the highest in-degree centrality is {highest_in_degree}.")
print(f"The user with the highest out-degree centrality is {highest_out_degree}.")


The user with the highest in-degree centrality is RussianEmbassy.
The user with the highest out-degree centrality is Mousstach.
