In [None]:
import pandas as pd
import tldextract
import pycountry
import requests
import numpy as np
from bs4 import BeautifulSoup
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib.colors as colors
import matplotlib.cm as cm
import matplotlib.patches as mpatches
from mpl_toolkits.axes_grid1 import make_axes_locatable
from fuzzywuzzy import fuzz, process
import os

In [None]:
df = pd.read_csv("../Data/minimal_FactCheckData_local.csv.gz",compression = "gzip")

# Function to extract domain and suffix
def extract_domain_suffix(url):
    ext = tldextract.extract(url)
    return f"{ext.domain}.{ext.suffix}"

# Function to map suffix to ISO3 country code
def suffix_to_iso3(suffix):
    parts = suffix.split('.')
    if len(parts) > 1:
        suffix = parts[-2]
    country = pycountry.countries.get(alpha_2=suffix.upper())
    return country.alpha_3 if country else None

df['domain'] = df['url'].apply(extract_domain_suffix)
df['iso3'] = df['domain'].apply(lambda d: suffix_to_iso3(d.split('.')[-1]))

In [None]:
matched = pd.read_csv("../Data/to_match.csv")
matched.columns = ["domain","ISO3"]
matched = matched.set_index("domain").to_dict()["ISO3"]
iso3 = df["iso3"].tolist()

for i in range(len(iso3)):
    if pd.isna(iso3[i]) and df["domain"][i] in matched.keys():
        iso3[i] = matched[df["domain"][i]]

df["iso3"] = iso3

In [None]:
# Create DataFrame ISO3 - NUMBER OF FACT_CHECKS - NUMBER OF DOMAINS
iso3_count = df.groupby("iso3").size().reset_index(name='count')
iso3_count["count"] = iso3_count["count"].astype(int)
iso3_unique = df.groupby("iso3").domain.nunique().reset_index(name='unique_domains')
iso3_unique["unique_domains"] = iso3_unique["unique_domains"].astype(int)
iso3_df = pd.merge(iso3_count, iso3_unique, on="iso3")
# Map Country to ISO3
country_iso3 = pd.read_csv("https://raw.githubusercontent.com/lukes/ISO-3166-Countries-with-Regional-Codes/master/all/all.csv")

In [None]:
# 1 row 2 cols
fig, axs = plt.subplots(2, 1, figsize=(10, 10))
axs = axs.flatten()

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# Merge and fill NaN values with a specific value to represent 0 count
merged = world.set_index('iso_a3').join(iso3_df.set_index('iso3'))
merged['unique_domains'] = merged['unique_domains'].fillna(0)

colors_dict = {
    0: 'lightgrey',
    1: '#ffffcc',
    2: '#c2e699',
    3: '#78c679',
    4: '#31a354',
    5: '#006837'
}

def get_color(count):
    for threshold, color in sorted(colors_dict.items(), reverse=True):
        if count >= threshold:
            return color
    return 'darkred'

merged['unique_domains_color'] = merged['unique_domains'].apply(get_color)
merged.plot(color=merged['unique_domains_color'], linewidth=0.8, ax=axs[0], edgecolor='0.7')

# Legend
legend_handles = [mpatches.Patch(color=color, label=label) for label, color in [(x, colors_dict[x]) for x in range(6)]]
axs[0].legend(handles=legend_handles, title="Number of Fact-Checking Organisations",
              bbox_to_anchor=(0.5, -0.2), loc='lower center', fontsize='large',
              title_fontsize='large', frameon=True, facecolor='white', edgecolor='black',
              labelspacing=1.2, ncol=6)

# Remove Antarctica
axs[0].set_ylim([-60, 90])
axs[0].axis('off')

world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))
# Merge and fill NaN values with a specific value to represent 0 count
merged = world.set_index('iso_a3').join(iso3_df.set_index('iso3'))
merged['count'] = merged['count'].fillna(0)

colors_dict = {
    0: 'lightgrey',
    1: '#edf8e9',
    10: '#bae4b3',
    100: '#74c476',
    1000: '#31a354',
    10000: '#006d2c'
}

def get_color(count):
    for threshold, color in sorted(colors_dict.items(), reverse=True):
        if count >= threshold:
            return color
    return 'darkgreen'

merged['count_color'] = merged['count'].apply(get_color)

merged.plot(color=merged['count_color'], linewidth=0.8, ax=axs[1], edgecolor='0.7')

# Create legend
legend_handles = [mpatches.Patch(color=color, label=label) for label, color in [(x, colors_dict[x]) for x in sorted(colors_dict.keys())]]
legend_handles.append(mpatches.Patch(color='darkgreen', label='10000+'))

axs[1].legend(handles=legend_handles, title="Number of Fact-Checks",
              bbox_to_anchor=(0.5, -0.2), loc='lower center', fontsize='large',
              title_fontsize='large', frameon=True, facecolor='white', edgecolor='black',
              labelspacing=1.2, ncol=7)

# Remove Antarctica
axs[1].set_ylim([-60, 90])
axs[1].axis('off')


# Add labels to Panels A/B - Bold 
axs[0].text(0.05, 1.05, 'A', fontsize=22, fontweight='bold', horizontalalignment='center', verticalalignment='center', transform=axs[0].transAxes)
axs[1].text(0.05, 1.05, 'B', fontsize=22, fontweight='bold', horizontalalignment='center', verticalalignment='center', transform=axs[1].transAxes)

plt.tight_layout()
plt.savefig("../Plots/00_FactCheckersMap.png", dpi=300, bbox_inches='tight')
plt.savefig("../Plots/00_FactCheckersMap.pdf", bbox_inches='tight')