In [1]:
import sys
from pathlib import Path
import os

current_dir = Path.cwd()
project_dir = current_dir.parent
sys.path.append(str(project_dir))

from settings import ANNOTATED_ARTICLES_FILE

import pandas as pd
import folium
import re
import plotly.graph_objects as go


# Load the annotated articles from the JSON file
annotated_articles = pd.read_json(ANNOTATED_ARTICLES_FILE)

In [2]:
def extract_country_from_filename(filename):
    match = re.search(r'_([A-Z]{2})_', filename)
    if match:
        return match.group(1)
    return None

annotated_articles['country'] = annotated_articles['filename'].apply(extract_country_from_filename)

# Filter out articles related to Russia (RU)
annotated_articles = annotated_articles[annotated_articles['country'] != 'RU']

# Define a dictionary mapping country codes to geographic coordinates
country_coordinates = {
    'US': [37.0902, -95.7129],
    'UA': [48.3794, 31.1656],
    'GB': [55.3781, -3.4360],
    'BY': [53.7098, 27.9534]
}

# Calculate average sentiment and propaganda percentage for each country
country_sentiment = annotated_articles.groupby('country')['sentiment_ukraine'].mean()
country_propaganda = annotated_articles.groupby('country')['propaganda'].apply(lambda x: x.eq('Present').mean() * 100)

# Create a Plotly scatter geo plot to visualize sentiment and propaganda by country
fig = go.Figure()

sentiment_data = go.Scattergeo(
    lon=[country_coordinates[c][1] for c in country_sentiment.index],
    lat=[country_coordinates[c][0] for c in country_sentiment.index],
    text=[f"Country: {c}<br>Average Sentiment: {s:.2f}" for c, s in country_sentiment.items()],
    mode='markers',
    marker=dict(
        size=country_sentiment * 10,
        color=country_sentiment,
        colorscale='Blues',
        opacity=0.7
    ),
    name='Sentiment'
)

propaganda_data = go.Scattergeo(
    lon=[country_coordinates[c][1] for c in country_propaganda.index],
    lat=[country_coordinates[c][0] for c in country_propaganda.index],
    text=[f"Country: {c}<br>Propaganda Percentage: {p:.2f}%" for c, p in country_propaganda.items()],
    mode='markers',
    marker=dict(
        size=country_propaganda / 2,
        color=country_propaganda,
        colorscale='Reds',
        opacity=0.7
    ),
    name='Propaganda'
)

fig.add_trace(sentiment_data)
fig.add_trace(propaganda_data)

fig.update_layout(
    title='Sentiment and Propaganda by Country (Excluding Russia)',
    geo=dict(
        scope='world',
        projection_type='equirectangular',
        showland=True,
        landcolor='rgb(243, 243, 243)',
        countrycolor='rgb(204, 204, 204)'
    ),
    legend=dict(
        x=0,
        y=1,
        bgcolor='rgba(255, 255, 255, 0.7)',
        bordercolor='rgba(0, 0, 0, 0.2)'
    )
)

fig.show()