In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Step 0: Install required libraries if you haven't already

# Step 1: Read Data from Excel
df = pd.read_excel('Your Excel File path here')

# List of social media platforms
platforms = ['X (Twitter) Follower', 'Facebook Follower', 'Instagram Follower', 'YouTube Subscriber', 'TikTok Subscriber']

# Iterate through each platform
for platform in platforms:
    # Step 2: Calculate Basic Statistics
    follower_stats = df[f'{platform} #'].describe()

    # Display the basic statistics
    print(f"\n{platform} Follower Statistics:")
    print(follower_stats)

    # Step 3: Visualize with Histogram
    plt.figure(figsize=(10, 6))
    plt.hist(df[f'{platform} #'], bins=20, color='skyblue', edgecolor='black')
    plt.title(f'{platform} Follower Distribution')
    plt.xlabel('Follower Count')
    plt.ylabel('Frequency')
    plt.show()



In [None]:
# Step 2: Language Analysis
language_counts = df['Language'].value_counts()

# Display the language distribution
print("\nLanguage Distribution:")
print(language_counts)

# Step 3: Visualize with Bar Chart and Annotations
plt.figure(figsize=(10, 6))
bar_chart = language_counts.plot(kind='bar', color='skyblue')
plt.title('Language Distribution')
plt.xlabel('Language')
plt.ylabel('Count')

# Add annotations on top of each bar
for index, value in enumerate(language_counts):
    plt.text(index, value + 0.1, str(value), ha='center', va='bottom')

plt.show()

In [None]:
# Step 2: Calculate Overall Influence Score
# Define an influence score by summing up follower counts across platforms
df['Overall Influence Score'] = df[['X (Twitter) Follower #', 'Facebook Follower #', 'Instagram Follower #', 'YouTube Subscriber #', 'TikTok Subscriber #']].sum(axis=1)

# Step 3: Identify Top Influencers
top_influencers = df.nlargest(10, 'Overall Influence Score')  # Change 10 to the desired number of top influencers

# Display the top influencers
print("\nTop Influencers:")
print(top_influencers[['Name (English)', 'Overall Influence Score']])

# Step 4: Visualize Top Influencers
plt.figure(figsize=(10, 6))
plt.bar(top_influencers['Name (English)'], top_influencers['Overall Influence Score'], color='skyblue')
plt.title('Top Influencers')
plt.xlabel('Influencer')
plt.ylabel('Overall Influence Score')
plt.xticks(rotation=45, ha='right')  # Adjust rotation for better readability
plt.show()

In [None]:
#                                                            Not Clear

# import networkx as nx
# from unidecode import unidecode

# # Step 2: Identify Relationships
# # Assuming 'Parent entity (English)' is the column indicating relationships
# relationships = df[['Name (English)', 'Parent entity (English)']].dropna()

# # Step 3: Transliterate Names to English
# relationships['Name (English)'] = relationships['Name (English)'].apply(unidecode)
# relationships['Parent entity (English)'] = relationships['Parent entity (English)'].apply(unidecode)

# # Step 4: Visualize Network with Adjusted Layout
# G = nx.from_pandas_edgelist(relationships, 'Parent entity (English)', 'Name (English)')

# plt.figure(figsize=(16, 12))
# pos = nx.circular_layout(G)  # You can try different layout algorithms here

# nx.draw(G, pos, with_labels=True, node_size=2000, node_color='skyblue', font_size=10, font_color='black', font_weight='bold', edge_color='gray', linewidths=1, alpha=0.7)

# plt.title('Entity Network Visualization')
# plt.show()

In [None]:
import networkx as nx
from unidecode import unidecode

# Step 2: Identify Relationships
relationships = df[['Name (English)', 'Parent entity (English)']].dropna()

# Step 3: Transliterate Names to English
relationships['Name (English)'] = relationships['Name (English)'].apply(unidecode)
relationships['Parent entity (English)'] = relationships['Parent entity (English)'].apply(unidecode)

# Step 4: Create Graph
G = nx.from_pandas_edgelist(relationships, 'Parent entity (English)', 'Name (English)')

# Step 5: Export to GraphML
nx.write_graphml(G, 'network_data.graphml')

# Step 6: Import to Gephi
# Open Gephi and import the graphml file which you just exported to your local directory

In [None]:
# Step 2: Identify Missing Values
missing_values = df.isnull().sum()

# Display the count of missing values for each column
print("Missing Values:")
print(missing_values)

# Step 3: Impute or Handle Missing Values
# For simplicity, let's fill missing values with a placeholder (e.g., "Not Available")
df_filled = df.fillna("Not Available")

# Step 4: Generate Circular Graphs for each Platform
for platform in platforms:
    # Filter data for the specific platform
    platform_data = df_filled[['Region of Focus', f'{platform} #']]
    
    # Convert follower counts to numeric, ignoring non-numeric values
    platform_data[f'{platform} #'] = pd.to_numeric(platform_data[f'{platform} #'], errors='coerce')
    
    # Drop rows with missing or non-numeric follower counts
    platform_data = platform_data.dropna(subset=[f'{platform} #'])
    
    # Group data by region and sum the follower counts
    platform_data_grouped = platform_data.groupby('Region of Focus').sum()

    # Create a pie chart with a legend
    plt.figure(figsize=(10, 8))
    patches, texts, autotexts = plt.pie(
        platform_data_grouped[f'{platform} #'],
        labels=None,  # Removing labels in the pie chart
        autopct=lambda p: '{:.1f}%'.format(p) if p > 15 else '',  # Show percentages above 15%
        startangle=140,
        colors=plt.cm.Paired(range(len(platform_data_grouped)))
    )
    
    # Create a legend with corresponding names (showing only major details)
    legend_labels = [f'{label}: {value}' for label, value in zip(platform_data_grouped.index, platform_data_grouped[f'{platform} #']) if value > 10000]
    plt.legend(patches, legend_labels, loc="best")

    plt.title(f'{platform} Follower Distribution by Region')
    plt.show()
