## Plotly

In [31]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the updated HorizonAnalytics template
HorizonAnalytics = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#0d1b2a',  # Background color
        plot_bgcolor='#0d1b2a',  # Background color
        height=800,
        width=800 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines for contrast
            tickfont=dict(
                size=36,  # Consistent with other elements
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match other elements
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines for contrast
            linewidth=2
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines
            tickfont=dict(
                size=36,  # Consistent with x-axis
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match x-axis
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines
            linewidth=2
        ),
        font=dict(
            color='#ffffff',  # White font for all text
            size=36,  # Uniform font size
            family='Montserrat, sans-serif',
            weight="bold"
        ),
        # Refined colorway for better visibility and differentiation
        colorway=["#FFFF00", "#33D7FF", "#A463FF", "#FFD700", 
                  "#ff4081", "#ffc107", "#00c4a0", "#a0aec0"],
        title=dict(
            text='',
            font=dict(
                size=64,  # **Big Boost in Title Size**
                color='#ffffff',
                family='Montserrat, sans-serif',
                weight="bold"
            ),
            x=0.5,  # Center title
            y=0.97  # Push title higher
        )
    ),
    data=dict(
        scatter=[
            go.Scatter(
                line=dict(width=5)  # Increased line width for better visibility
            )
        ]
    )
)

# Register the updated HorizonAnalytics template
pio.templates['HorizonAnalytics'] = HorizonAnalytics
pio.templates.default = 'HorizonAnalytics'

## f_nobel_peace: id (year, laureate, country)
https://en.wikipedia.org/wiki/List_of_Nobel_Peace_Prize_laureates 

In [2]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

# Wikipedia URL for Nobel Peace Prize laureates
url = "https://en.wikipedia.org/wiki/List_of_Nobel_Peace_Prize_laureates"
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")

# Find the first table on the page
table = soup.find("table", {"class": "wikitable"})

# Extract data from the table
records = []
for row in table.find_all("tr")[1:]:  # Skip the header row
    cols = row.find_all("td")
    if len(cols) >= 5:  # Adjust for colspan in laureate column
        year = cols[0].text.strip()
        laureate_pic_cell = cols[1].find("a")  # Extract link from the first laureate column (picture)
        laureate_name_cell = cols[2].find("a")  # Extract link from the second laureate column (name)
        laureate_name = cols[1].text.strip() + " " + cols[2].text.strip()  # Merge laureate columns
        country_raw = cols[3].text.strip()  # Country is now in the fourth column
        
        # Remove birth/death years from laureate
        laureate = laureate_name.split('(')[0].strip()
        
        # Extract Wikipedia links
        link_pic = urljoin(url, laureate_pic_cell["href"]) if laureate_pic_cell else ""
        link_name = urljoin(url, laureate_name_cell["href"]) if laureate_name_cell else ""
        
        # Extract only the first country mentioned
        country = country_raw.split("[")[0].strip()
        
        # Handle cases where country might still be missing
        if not country:
            country = "Unknown"
        
        records.append([year, laureate, country, link_pic, link_name])

# Create DataFrame
df = pd.DataFrame(records, columns=["year", "laureate", "country", "link_pic", "link_name"])

# Remove any empty rows
df.dropna(inplace=True)

df = df[df["year"].str.isnumeric()]  # Ensure year values are valid numbers

# Sort by year (ascending), country (ascending), laureate (ascending)
df.sort_values(by=["year", "country", "laureate"], ascending=[True, True, True], inplace=True)

# Generate sequential integer ID starting at 1
df.reset_index(drop=True, inplace=True)
df.insert(0, "id", df.index + 1)  # Insert 'id' as the first column

# Save to CSV with the correct output format
df.to_csv("d_nobel_peace_raw.csv", index=False, columns=["id", "laureate", "country", "year", "link_pic", "link_name"])

In [9]:
import pandas as pd

# Mapping of country names to ISO 3166-1 alpha-3 codes
country_iso_mapping = {
    "Argentina": "ARG",
    "Austria": "AUT",
    "Bangladesh": "BGD",
    "Belgium": "BEL",
    "Belarus": "BLR",
    "Canada": "CAN",
    "China": "CHN",
    "Colombia": "COL",
    "Costa Rica": "CRI",
    "Czech Republic": "CZE",
    "Democratic Republic of the Congo": "COD",
    "Egypt": "EGY",
    "Ethiopia": "ETH",
    "European Union": "EU",  # Not an official ISO code, but useful
    "East Timor": "TLS",  # Officially known as Timor-Leste
    "Finland": "FIN",
    "France": "FRA",
    "Germany": "DEU",
    "Guatemala": "GTM",
    "India": "IND",
    "Iran": "IRN",
    "Ireland": "IRL",
    "Israel": "ISR",
    "Italy": "ITA",
    "Japan": "JPN",
    "Kenya": "KEN",
    "Liberia": "LBR",
    "Myanmar": "MMR",
    "Netherlands": "NLD",
    "Norway": "NOR",
    "Palestine": "PSE",
    "Philippines": "PHL",
    "Poland": "POL",
    "Russia": "RUS",
    "Serbia": "SRB",
    "South Africa": "ZAF",
    "South Korea": "KOR",
    "Sweden": "SWE",
    "Switzerland": "CHE",
    "Tunisia": "TUN",
    "Turkey": "TUR",
    "United Kingdom": "GBR",
    "United States": "USA",
    "United States;United Kingdom": "USA;UK",  # Assuming first country is primary
    "United Nations": "UN"  # For organizations
}

def update_nobel_peace_data(csv_file):
    try:
        # Read CSV with proper handling of quotes
        df = pd.read_csv(csv_file, quotechar='"', on_bad_lines='skip')

        # Standardize "Other: ..." cases to "United Nations"
        df["country"] = df["country"].apply(lambda x: "United Nations" if str(x).startswith("Other:") else x)

        # Map countries to their ISO 3166-1 alpha-3 codes
        df["country_code"] = df["country"].map(country_iso_mapping)

        # Find unmatched countries
        unmatched_countries = df[df["country_code"].isna()]["country"].unique()
        
        if len(unmatched_countries) > 0:
            print("[WARNING] Unmatched countries found:")
            for country in unmatched_countries:
                print(f" - {country}")

        # Reorder columns to insert 'country_code' immediately after 'country'
        cols = list(df.columns)
        cols.insert(cols.index("country") + 1, cols.pop(cols.index("country_code")))
        df = df[cols]

        # Save changes to the original file
        df.to_csv(csv_file, index=False)
        print(f"[SUCCESS] Updated {csv_file} with country_code column.")

        return df

    except Exception as e:
        print(f"[ERROR] Failed to update CSV: {e}")

# Run the function to update the file
update_nobel_peace_data("d_nobel_peace.csv")

[SUCCESS] Updated d_nobel_peace.csv with country_code column.


Unnamed: 0,id,laureate,country,country_code,year,link_pic,link_name
0,1,Henry Dunant,Switzerland,CHE,1901,https://en.wikipedia.org/wiki/File:Jean_Henri_...,https://en.wikipedia.org/wiki/Henry_Dunant
1,2,Élie Ducommun,Switzerland,CHE,1902,https://en.wikipedia.org/wiki/File:Ducommun.jpg,https://en.wikipedia.org/wiki/%C3%89lie_Ducommun
2,3,William Randal Cremer,United Kingdom,GBR,1903,https://en.wikipedia.org/wiki/File:Cremer.jpg,https://en.wikipedia.org/wiki/Randal_Cremer
3,4,Institute of International Law,Belgium,BEL,1904,https://en.wikipedia.org/wiki/File:Logo_of_Ins...,https://en.wikipedia.org/wiki/Institut_de_Droi...
4,5,Bertha von Suttner,Austria,AUT,1905,https://en.wikipedia.org/wiki/File:Bertha_von_...,https://en.wikipedia.org/wiki/Bertha_von_Suttner
...,...,...,...,...,...,...,...
100,101,World Food Programme,United Nations,UN,2020,https://en.wikipedia.org/wiki/File:World_Food_...,https://en.wikipedia.org/wiki/World_Food_Progr...
101,102,Maria Ressa,Philippines,PHL,2021,https://en.wikipedia.org/wiki/File:Maria_Ressa...,https://en.wikipedia.org/wiki/Maria_Ressa
102,103,Ales Bialiatski,Belarus,BLR,2022,https://en.wikipedia.org/wiki/File:Alaksandr_B...,https://en.wikipedia.org/wiki/Ales_Bialiatski
103,104,Narges Mohammadi,Iran,IRN,2023,https://en.wikipedia.org/wiki/File:Narges_Moha...,https://en.wikipedia.org/wiki/Narges_Mohammadi


## nobel pics based on 'laureate' link

In [1]:
import os
import requests
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import urljoin, unquote
from PIL import Image
from io import BytesIO

def extract_nobel_peace_pictures(csv_file, output_folder):
    os.makedirs(output_folder, exist_ok=True)
    df = pd.read_csv(csv_file)
    headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
    
    for _, row in df.iterrows():
        if pd.isna(row["link_pic"]) or row["link_pic"].strip() == "":
            print(f"[SKIP] No image link found for {row['laureate']}")
            continue
        
        print(f"[PROCESSING] {row['laureate']} - Link: {row['link_pic']}")
        
        response = requests.get(row["link_pic"], headers=headers, allow_redirects=True)
        if response.status_code != 200:
            print(f"[ERROR] Failed to access {row['link_pic']} (Status Code: {response.status_code})")
            continue
        
        soup = BeautifulSoup(response.text, "html.parser")
        
        # Extract the full image URL from the media file page
        image_div = soup.find("div", {"class": "fullImageLink"})
        if image_div:
            img_tag = image_div.find("a")
            if img_tag and "href" in img_tag.attrs:
                img_url = urljoin("https://upload.wikimedia.org/wikipedia/commons/", unquote(img_tag["href"]))
                print(f"[FOUND] Image URL: {img_url}")
            else:
                print(f"[ERROR] No valid image link found inside fullImageLink for {row['laureate']}")
                continue
        else:
            print(f"[ERROR] No fullImageLink div found for {row['laureate']}")
            continue
        
        # Detect SVG files and save them directly
        if img_url.endswith(".svg"):
            svg_path = os.path.join(output_folder, f"{row['id']}.svg")
            svg_response = requests.get(img_url, headers=headers, stream=True)
            if svg_response.status_code == 200:
                with open(svg_path, "wb") as f:
                    f.write(svg_response.content)
                print(f"[SUCCESS] Saved SVG image for {row['laureate']} as {svg_path}")
            else:
                print(f"[ERROR] Failed to download SVG for {row['laureate']}")
            continue  # Skip further processing for SVGs
        
        # Download and verify the image
        img_response = requests.get(img_url, headers=headers, stream=True)
        if img_response.status_code != 200:
            print(f"[ERROR] Failed to download image for {row['laureate']} (Status Code: {img_response.status_code})")
            continue
        
        try:
            img = Image.open(BytesIO(img_response.content))
            img_path = os.path.join(output_folder, f"{row['id']}.png")
            img.save(img_path, "PNG")
            print(f"[SUCCESS] Saved image for {row['laureate']} as {img_path}")
        except Exception as e:
            print(f"[ERROR] Processing image for {row['laureate']}: {e}")

# Call the function with the appropriate parameters
extract_nobel_peace_pictures("d_nobel_peace.csv", "/Users/arya/Documents/Adobe/Premiere Pro/Horizon Analytics/2025-03_assemble_nobel_peace")

## frames