## Plotly

In [9]:
import plotly.graph_objects as go
import plotly.io as pio

# Define the updated HorizonAnalytics template
HorizonAnalytics = go.layout.Template(
    layout=go.Layout(
        paper_bgcolor='#0d1b2a',  # Background color
        plot_bgcolor='#0d1b2a',  # Background color
        height=800,
        width=800 * 1.618,
        xaxis=dict(
            anchor='y',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines for contrast
            tickfont=dict(
                size=36,  # Consistent with other elements
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match other elements
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines for contrast
            linewidth=2
        ),
        yaxis=dict(
            anchor='x',
            showgrid=True,
            gridcolor='rgba(255, 255, 255, 0.2)',  # Softer grid lines
            tickfont=dict(
                size=36,  # Consistent with x-axis
                family='Montserrat, sans-serif',
                color='#ffffff',
                weight="bold"
            ),
            title=dict(
                text='',
                font=dict(
                    size=48,  # Increase to match x-axis
                    family='Montserrat, sans-serif',
                    color='#ffffff',
                    weight="bold"
                )
            ),
            linecolor='#ffffff',  # White axis lines
            linewidth=2
        ),
        font=dict(
            color='#ffffff',  # White font for all text
            size=36,  # Uniform font size
            family='Montserrat, sans-serif',
            weight="bold"
        ),
        # Refined colorway for better visibility and differentiation
        colorway=["#FFFF00", "#33D7FF", "#A463FF", "#FFD700", 
                  "#ff4081", "#ffc107", "#00c4a0", "#a0aec0"],
        title=dict(
            text='',
            font=dict(
                size=64,  # **Big Boost in Title Size**
                color='#ffffff',
                family='Montserrat, sans-serif',
                weight="bold"
            ),
            x=0.5,  # Center title
            y=0.97  # Push title higher
        )
    ),
    data=dict(
        scatter=[
            go.Scatter(
                line=dict(width=5)  # Increased line width for better visibility
            )
        ]
    )
)

# Register the updated HorizonAnalytics template
pio.templates['HorizonAnalytics'] = HorizonAnalytics
pio.templates.default = 'HorizonAnalytics'

## Extraction

d_inauguration: president, term, party, start_date, end_date, order_number_range (link)
d_exec_orders_dates: president, term, exec_order_signed_date (missing 1950 - Truman)
d_exec_orders: president, term, days_in_office (<100), cum_exec_orders

### d_inauguration

In [34]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from IPython.core.display import display, HTML

# Wikipedia URL
url = "https://en.wikipedia.org/wiki/List_of_United_States_federal_executive_orders"

# Request the page
response = requests.get(url)
if response.status_code != 200:
    raise Exception(f"Failed to retrieve page: {response.status_code}")

# Parse HTML
soup = BeautifulSoup(response.text, 'html.parser')

# Find the target table (first 'wikitable' on the page)
table = soup.find('table', {'class': 'wikitable'})

# Convert table to DataFrame
d_inauguration = pd.read_html(str(table))[0]

# Rename columns for clarity
d_inauguration.columns = ["#", "president", "party", "total_executive_orders", "order_number_range", "years_in_office", "exec_orders_per_year", "period"]

# Replace 'term' with 'num' based on the '#' column
d_inauguration["num"] = d_inauguration["#"]

# Extract start and end dates
d_inauguration[["start_date", "end_date"]] = d_inauguration["period"].str.split(" – ", expand=True)

# Extract links for order_number_range
order_links = []
for row in table.find_all("tr")[1:]:  # Skip header
    cells = row.find_all("td")
    if len(cells) > 4 and cells[4].find("a"):
        order_links.append("https://en.wikipedia.org" + cells[4].find("a")["href"])
    else:
        order_links.append(None)
d_inauguration["order_number_range"] = order_links

# Select required columns
d_inauguration = d_inauguration[["president", "num", "party", "start_date", "end_date", "order_number_range"]]

# Convert 'start_date' and 'end_date' to datetime format
d_inauguration['start_date'] = pd.to_datetime(d_inauguration['start_date'], errors='coerce')
d_inauguration['end_date'] = pd.to_datetime(d_inauguration['end_date'], errors='coerce')

# Save as CSV
d_inauguration.to_csv("d_inauguration.csv", index=False)

# Display as a scrollable table in Jupyter Notebook
# display(HTML(d_inauguration.to_html(index=False, escape=False)))

print("Data extraction complete. File saved as 'd_inauguration.csv'.")


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython.display



Data extraction complete. File saved as 'd_inauguration.csv'.



Passing literal html to 'read_html' is deprecated and will be removed in a future version. To read from a literal string, wrap it in a 'StringIO' object.



### d_exec_order_dates

In [35]:
import requests
import pandas as pd
from bs4 import BeautifulSoup
from time import sleep
from io import StringIO
from IPython.core.display import display, HTML

# Load d_inauguration.csv
d_inauguration = pd.read_csv("d_inauguration.csv")

# Define filtering criteria
filter_column = "num"  # Change this to any column you want to filter by
filter_values = list(range(24, 48))

# Apply filtering
d_inauguration = d_inauguration[d_inauguration[filter_column].isin(filter_values)].reset_index(drop=True)

# List to store executive orders
executive_orders = []

# Function to scrape executive orders
def scrape_executive_orders(url, president):
    if not isinstance(url, str) or pd.isna(url):
        return []
    
    response = requests.get(url)
    if response.status_code != 200:
        print(f"Failed to retrieve page: {url}")
        return []

    soup = BeautifulSoup(response.text, 'html.parser')
    
    # Debug: print out the response title to confirm correct page
    print(f"Scraping {president}'s page: {url}")
    
    # Determine whether to look for h3 or h2 tags based on the president
    if president in ["Donald Trump – I", "Donald Trump – II"]:
        # print(f"Using <h2> tags for {president} years.")
        years = soup.find_all("h2")  # For Trump, years are under h2
    else:
        # print(f"Using <h3> tags for {president} years.")
        years = soup.find_all("h3")  # For other presidents, years are under h3

    if not years:
        print(f"No year headings found for {president}.")
    
    orders_list = []
    last_num_rel = None  # Track last num_rel

    for year_tag in years:
        year_text = year_tag.get_text(strip=True)  # Extract year text
        # print(f"Found year: {year_text}")  # Debug: show the year being processed
        
        # Only keep valid year headings (e.g., 2017, 2018, etc.)
        if not year_text.isdigit():
            # print(f"Skipping invalid year: {year_text}")
            continue

        year = year_text  # Only process valid years
        table = year_tag.find_next("table", {"class": "wikitable"})
        
        if not table:
            print(f"No table found for {year}. Skipping.")
            continue

        df_orders = pd.read_html(StringIO(str(table)))[0]  # Read table

        # Skip empty tables
        if df_orders.empty:
            print(f"Table for {year} is empty. Skipping.")
            continue

        # Ensure table has necessary columns
        if df_orders.shape[1] < 4:
            print(f"Table for {year} doesn't have enough columns. Skipping.")
            continue

        # Identify "Date signed" column
        date_col = next((col for col in df_orders.columns if "Date signed" in col or "Date" in col), None)
        if not date_col:
            print(f"No 'Date signed' column found for {year}. Skipping.")
            continue

        # Forward-fill missing dates and rename columns
        df_orders = df_orders.ffill().rename(columns={date_col: "exec_order_signed_date"})
        df_orders = df_orders.iloc[:, :4]  # Keep first 4 columns
        df_orders.columns = ["num_rel", "num_abs", "title", "exec_order_signed_date"]

        # Convert num_rel to integer
        df_orders["num_rel"] = pd.to_numeric(df_orders["num_rel"], errors="coerce").fillna(0).astype(int)

        # Clean num_abs: Only take the leftmost 5 digits and filter non-numeric entries
        df_orders["num_abs"] = df_orders["num_abs"].astype(str).str[:5]
        # df_orders["num_abs"] = pd.to_numeric(df_orders["num_abs"], errors="coerce")  # Convert valid numeric values
        # df_orders["num_abs"] = df_orders["num_abs"].fillna(0).astype(int)  # Replace NaN with 0

        # Stop if num_rel resets to 1 (indicating a new section)
        if last_num_rel and df_orders["num_rel"].iloc[0] == 1 and last_num_rel > 1:
            print(f"Stopping extraction for {president}: num_rel reset to 1.")
            break

        last_num_rel = df_orders["num_rel"].iloc[-1]  # Update last num_rel

        # Add president and year, reorder columns
        df_orders["president"] = president
        df_orders["year"] = year
        df_orders = df_orders[["president", "num_rel", "num_abs", "year", "exec_order_signed_date", "title"]]

        orders_list.extend(df_orders.to_dict("records"))

    return orders_list

# Scrape executive orders for each president
for _, row in d_inauguration.iterrows():
    if isinstance(row["order_number_range"], str):
        print(f"Scraping executive orders for {row['president']}...")
        orders = scrape_executive_orders(row["order_number_range"], row["president"])
        executive_orders.extend(orders)
        sleep(2)  # Avoid getting blocked

# Convert results to DataFrame and save
d_exec_orders_dates = pd.DataFrame(executive_orders)

# Debug: Print out the number of records found
print(f"Number of executive orders found: {len(executive_orders)}")

# Save to CSV
d_exec_orders_dates.to_csv("d_exec_orders_dates.csv", index=False)

# # Display results
# display(HTML(d_exec_orders_dates.to_html(index=False, escape=False)))
# print("Data extraction complete. File saved as 'd_exec_orders_dates.csv'.")


Importing display from IPython.core.display is deprecated since IPython 7.14, please import from IPython.display



Scraping executive orders for William McKinley...
Scraping William McKinley's page: https://en.wikipedia.org/wiki/List_of_executive_actions_by_William_McKinley


KeyboardInterrupt: 

### d_exec_orders

In [30]:
import pandas as pd
from IPython.display import display, HTML

# Load the CSV file
d_exec_order_dates = pd.read_csv("d_exec_orders_dates.csv")

# Check if there are any rows with missing dates
missing_date_rows = d_exec_order_dates[d_exec_order_dates['exec_order_signed_date'].isna()]
if not missing_date_rows.empty:
    print("Rows with missing dates:")
    display(HTML(missing_date_rows.to_html(index=False, escape=False)))

# Handle Herbert Hoover's dates
def handle_hoover_dates(row):
    if row['president'] == "Herbert Hoover":
        # Remove periods from month abbreviations if present
        cleaned_date = row['exec_order_signed_date'].replace('.', '')  # Remove periods

        # Handle cases where the date format is something like 'Mar 07', 'May 1', 'Jan. 03'
        parts = cleaned_date.split()
        if len(parts) == 2:  # If the date is in format "Month Day"
            month, day = parts

            # Add leading zero to single digit day
            if len(day) == 1:
                day = '0' + day

            # Normalize "June", "July", and "Sept" to their proper abbreviations
            if month.lower() == "june":
                month = "Jun"  # Normalize "June" to "Jun"
            elif month.lower() == "july":
                month = "Jul"  # Normalize "July" to "Jul"
            elif month.lower() == "september" or month.lower() == "sept":
                month = "Sep"  # Normalize "September" and "Sept" to "Sep"
            
            # Return the cleaned date
            cleaned_date = f'{month} {day}'

        # Concatenate the year with the cleaned date
        try:
            date_string = str(row['year']) + ' ' + cleaned_date
            # Handle potential inconsistent formats, ensuring we get a valid date string
            return pd.to_datetime(date_string, errors='coerce', format='%Y %b %d')  # E.g., '1930 Jun 03'
        except Exception as e:
            print(f"Error processing date for {row['president']} {row['exec_order_signed_date']}: {e}")
            return pd.NaT
    else:
        return row['exec_order_signed_date']  # No change for other presidents

# Apply the date handling function to Herbert Hoover's rows
d_exec_order_dates['exec_order_signed_date'] = d_exec_order_dates.apply(handle_hoover_dates, axis=1)

# Explicitly check for the date format in YYYY-MM-DD and avoid reprocessing
def process_fdr_dates(row):
    # Check if the date is already in 'YYYY-MM-DD' format (if it's valid and correctly formatted)
    if isinstance(row['exec_order_signed_date'], str) and '-' in row['exec_order_signed_date']:
        try:
            # Try parsing the date with the expected format (YYYY-MM-DD)
            pd.to_datetime(row['exec_order_signed_date'], format='%Y-%m-%d', errors='raise')
            return row['exec_order_signed_date']  # If valid, return as is
        except:
            pass  # Continue to the next check if not valid
    
    # For other dates, apply pd.to_datetime()
    return pd.to_datetime(row['exec_order_signed_date'], errors='coerce')

# Apply the special function to Franklin D. Roosevelt's rows
d_exec_order_dates['exec_order_signed_date'] = d_exec_order_dates.apply(process_fdr_dates, axis=1)

# Convert to datetime (now all rows should be consistently in datetime format)
d_exec_order_dates["exec_order_signed_date"] = pd.to_datetime(d_exec_order_dates["exec_order_signed_date"], errors='coerce')

# After conversion, check if there are any invalid or NaT values
invalid_dates = d_exec_order_dates[d_exec_order_dates["exec_order_signed_date"].isna()]

# Display invalid rows as a scrollable HTML table in Jupyter
if not invalid_dates.empty:
    html_table = invalid_dates.to_html(index=False, escape=False)
    display(HTML(f'<div style="height:400px; overflow:auto;">{html_table}</div>'))

# Convert to the final format YYYY-MM-DD
d_exec_order_dates["exec_order_signed_date"] = d_exec_order_dates["exec_order_signed_date"].dt.strftime('%Y-%m-%d')

# Optionally, fill invalid dates with a default value (e.g., today's date or NaN)
# d_exec_order_dates["exec_order_signed_date"].fillna(pd.to_datetime('today'), inplace=True)

# Save the processed data to a new CSV file
d_exec_order_dates.to_csv("d_exec_orders.csv", index=False)

print("Date format standardization complete. Processed file saved as 'd_exec_orders.csv'.")

In [3]:
import pandas as pd
from IPython.display import display, HTML

# Load d_exec_orders and d_inauguration CSV files
d_exec_orders = pd.read_csv("d_exec_orders.csv")
d_inauguration = pd.read_csv("d_inauguration.csv")

# Convert 'start_date' and 'exec_order_signed_date' to datetime format
d_inauguration['start_date'] = pd.to_datetime(d_inauguration['start_date'], errors='coerce')
d_exec_orders['exec_order_signed_date'] = pd.to_datetime(d_exec_orders['exec_order_signed_date'], errors='coerce')

# Left join d_exec_orders with d_inauguration on the 'president' column
d_exec_orders = pd.merge(d_exec_orders, d_inauguration[['president', 'start_date', 'num']], on='president', how='left')

# Drop the extra 'start_date_y' column if it exists
if 'start_date_y' in d_exec_orders.columns:
    d_exec_orders = d_exec_orders.drop(columns=['start_date_y'])

# Ensure we are using 'start_date_x' as the correct start date
d_exec_orders = d_exec_orders.rename(columns={'start_date_x': 'start_date'})

# Convert 'exec_order_signed_date' and 'start_date' to datetime format if they are still strings
d_exec_orders['exec_order_signed_date'] = pd.to_datetime(d_exec_orders['exec_order_signed_date'], errors='coerce')
d_exec_orders['start_date'] = pd.to_datetime(d_exec_orders['start_date'], errors='coerce')

# Filter the data to only include the first 50 days after each president's start date
d_exec_orders['days_since_start'] = (d_exec_orders['exec_order_signed_date'] - d_exec_orders['start_date']).dt.days

# Keep only rows where 'days_since_start' is between 0 and 50
# d_exec_orders = d_exec_orders[(d_exec_orders['days_since_start'] >= 0) & (d_exec_orders['days_since_start'] <= 49)]

# Calculate 'days_in_office' as the difference between exec_order_signed_date and start_date
d_exec_orders['days_in_office'] = (d_exec_orders['exec_order_signed_date'] - d_exec_orders['start_date']).dt.days + 1

# Drop the 'days_since_start' column as it's no longer needed
d_exec_orders = d_exec_orders.drop(columns=['days_since_start'])

# Save the resulting DataFrame as 'd_exec_orders.csv'
d_exec_orders.to_csv("d_exec_orders.csv", index=False)

# Display the resulting DataFrame
display(HTML(d_exec_orders.to_html(index=False, escape=False)))

print("Filtered data to include only the first 50 days after each president's start date. 'num' added from d_inauguration. 'days_in_office' calculated and included. File saved as 'd_exec_orders.csv'.")

In [5]:
import pandas as pd

# Load the d_exec_orders CSV file
d_exec_orders = pd.read_csv("d_exec_orders.csv")

# Rename the 'num' column to 'term'
d_exec_orders = d_exec_orders.rename(columns={'num': 'term'})

# Reorder columns to match the specified order and remove the 'title' column
d_exec_orders = d_exec_orders[['term', 'president', 'exec_order_signed_date', 'start_date', 'days_in_office', 'num_rel', 'num_abs']]

# Optionally, check the data and inspect the columns
print(d_exec_orders.head())

# Save the DataFrame as d_exec_orders_processed.csv
d_exec_orders.to_csv("d_exec_orders_processed.csv", index=False)

print("Data saved as 'd_exec_orders_processed.csv'.")

### rbt_exec_orders

In [6]:
import pandas as pd

# Load the d_exec_orders_processed CSV file
d_exec_orders = pd.read_csv("d_exec_orders_processed.csv")

# Extract 'term' and 'president', and create a 'day' column that goes from 1 to 50 for each president-term combo
rbt_exec_orders = pd.DataFrame(columns=['term', 'president', 'day', 'num_exec_orders'])

# Loop over each president-term combo and create a day column from 1 to 50
for _, group in d_exec_orders.groupby(['term', 'president']):
    term = group['term'].iloc[0]  # Get the term
    president = group['president'].iloc[0]  # Get the president
    days = list(range(1, 51))  # Create days from 1 to 50
    
    # Create a DataFrame with these days for each president-term combo and initialize 'cum_exec_orders' as 0
    temp_df = pd.DataFrame({'term': [term] * len(days), 
                            'president': [president] * len(days),
                            'day': days,
                            'num_exec_orders': [0] * len(days)})
    
    # Append the temp_df to rbt_exec_orders
    rbt_exec_orders = pd.concat([rbt_exec_orders, temp_df], ignore_index=True)

# Display the first few rows to check the result
print(rbt_exec_orders.head())

# Save the resulting DataFrame as 'rbt_exec_orders.csv'
rbt_exec_orders.to_csv("rbt_exec_orders.csv", index=False)

print("rbt_exec_orders created and saved as 'rbt_exec_orders.csv'.")


  term         president day num_exec_orders
0   25  William McKinley   1               0
1   25  William McKinley   2               0
2   25  William McKinley   3               0
3   25  William McKinley   4               0
4   25  William McKinley   5               0
rbt_exec_orders created and saved as 'rbt_exec_orders.csv'.


In [7]:
import pandas as pd

# Load the d_exec_orders_processed CSV file
d_exec_orders_processed = pd.read_csv("d_exec_orders_processed.csv")

# Load the rbt_exec_orders DataFrame (the one we created earlier)
rbt_exec_orders = pd.read_csv("rbt_exec_orders.csv")

# Merge rbt_exec_orders with d_exec_orders_processed on president, term, and day (which corresponds to days_in_office)
merged_df = pd.merge(rbt_exec_orders, d_exec_orders_processed, 
                     left_on=['president', 'term', 'day'], 
                     right_on=['president', 'term', 'days_in_office'], 
                     how='left')

# Count distinct num_abs for each group
# Group by term, president, and day (from rbt_exec_orders) and calculate the count of distinct num_abs
merged_df['num_exec_orders'] = merged_df.groupby(['term', 'president', 'day'])['num_abs'].transform('nunique')

# Drop the unnecessary columns from the merge
merged_df = merged_df[['term', 'president', 'day', 'num_exec_orders']]

# Deduplicate the final table by removing duplicate rows
merged_df = merged_df.drop_duplicates()

# Calculate the cumulative sum of num_exec_orders for each president and term
merged_df['cum_exec_orders'] = merged_df.groupby(['term', 'president'])['num_exec_orders'].cumsum()

# Add a row for day 0 with num_exec_orders 0 and cum_exec_orders 0 for each president
day_0_df = merged_df.groupby(['term', 'president']).first().reset_index()
day_0_df['day'] = 0
day_0_df['num_exec_orders'] = 0
day_0_df['cum_exec_orders'] = 0

# Concatenate the day 0 rows with the merged data
final_df = pd.concat([day_0_df, merged_df], ignore_index=True)

# Sort by term, president, and day
rbt_exec_orders = final_df.sort_values(by=['term', 'president', 'day']).reset_index(drop=True)

# Display the first few rows to verify the result
print(final_df.head())

# Save the resulting DataFrame as 'rbt_exec_orders.csv'
final_df.to_csv("rbt_exec_orders.csv", index=False)

print("rbt_exec_orders with day 0 included and cumulative sum saved as 'rbt_exec_orders.csv'.")


   term            president  day  num_exec_orders  cum_exec_orders
0    25     William McKinley    0                0                0
1    26   Theodore Roosevelt    0                0                0
2    27  William Howard Taft    0                0                0
3    28       Woodrow Wilson    0                0                0
4    29    Warren G. Harding    0                0                0
rbt_exec_orders with day 0 included and cumulative sum saved as 'rbt_exec_orders.csv'.


## Visualisation

In [4]:
# import plotly.express as px
# import pandas as pd

# # Function to generate 50 trend charts for a given president
# def generate_trend_charts(president_name, rbt_exec_orders, height=600, width=800):
#     # Filter the data for the selected president
#     president_data = rbt_exec_orders[rbt_exec_orders['president'] == president_name]
    
#     # Check if the president exists in the data
#     if president_data.empty:
#         print(f"No data available for {president_name}")
#         return
    
#     # Loop through each day from 1 to 50
#     for day in range(1, 51):
#         # Filter the data up to the current day
#         chart_data = president_data[president_data['day'] <= day]

#         # Get the dynamic y-axis maximum based on the current day's data
#         y_max = chart_data['cum_exec_orders'].max() * 1.05  # Slightly above the max value

#         # Create the trend chart
#         fig = px.line(chart_data, 
#                       x='day', 
#                       y='cum_exec_orders', 
#                       color='president', 
#                       template='HorizonAnalytics')
        
#         # Fix x-axis from 0 to 50 and set dynamic y-axis
#         fig.update_layout(
#             xaxis=dict(range=[0, 50]),
#             yaxis=dict(range=[0, y_max]),  # Set y-axis min to 0, max dynamically updated
#             xaxis_title=None,
#             yaxis_title=None,
#             showlegend=False,  # Hide the legend
#             height=height,  # Set custom height
#             width=width     # Set custom width
#         )
        
#         # Show the plot
#         fig.show()

# # Example of calling the function
# generate_trend_charts("Donald Trump – II", rbt_exec_orders)

### Trend Chart

In [13]:
import plotly.express as px
import pandas as pd
import os

# Function to generate and save 50 trend charts for each president with a global y-axis maximum
def generate_global_trend_charts(rbt_exec_orders, height=600, width=800, output_dir='output_images'):
    global_max = 0  # Initialize the global maximum for y-axis

    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Get unique presidents
    presidents = rbt_exec_orders['president'].unique()

    # Initialize chart index
    chart_index = 1

    # To track the lines plotted so far
    all_previous_lines = []

    # Loop through each president
    for president_name in presidents:
        # Filter the data for the current president
        president_data = rbt_exec_orders[rbt_exec_orders['president'] == president_name]
        
        # Check if the president exists in the data
        if president_data.empty:
            print(f"No data available for {president_name}")
            continue

        # Loop through each day from 1 to 50 for the current president
        for day in range(1, 51):
            # Filter the data up to the current day
            chart_data = president_data[president_data['day'] <= day]

            # Update the global maximum based on the current chart data
            global_max = max(global_max, chart_data['cum_exec_orders'].max())  # Update global max

            # Create the trend chart
            fig = px.line(chart_data, 
                          x='day', 
                          y='cum_exec_orders', 
                          color='president', 
                          template='HorizonAnalytics')
            
            # Add previous lines (in grey/transparent color)
            for prev_line in all_previous_lines:
                fig.add_trace(prev_line)

            # Add current president's line
            fig.add_trace(px.line(chart_data, x='day', y='cum_exec_orders').data[0])
            
            # Fix x-axis from 0 to 50 and set global y-axis maximum, ensuring y-axis starts from 0 and adding 5 units
            fig.update_layout(
                xaxis=dict(range=[0, 50]),
                yaxis=dict(
                    range=[0, global_max + 5]  # Always add 5 units to the max y-axis value
                ),
                xaxis_title=None,
                yaxis_title=None,
                showlegend=False,  # Hide the legend
                height=height,     # Set custom height
                width=width,       # Set custom width
                # Set transparent background
                plot_bgcolor='rgba(0, 0, 0, 0)',  # Transparent background for plot area
                paper_bgcolor='rgba(0, 0, 0, 0)'  # Transparent background for the overall paper
            )

            # Construct the file name with zero-padding for 4 digits (e.g., 0001.png, 0002.png)
            file_name = f"{chart_index:04d}.png"
            file_path = os.path.join(output_dir, file_name)

            # Save the chart as a .png file
            fig.write_image(file_path)

            # Increment the chart index
            chart_index += 1

        # After finishing plotting for this president, save the lines for future plots
        # Add the current president's line in grey to all_previous_lines
        fig_data = px.line(president_data, x='day', y='cum_exec_orders', color_discrete_sequence=['grey']).data[0]
        all_previous_lines.append(fig_data)

    print(f"Charts saved in the directory: {output_dir}")

# Example of calling the function
generate_global_trend_charts(rbt_exec_orders, height=1080, width=1280, output_dir='output_images')

Charts saved in the directory: output_images


### Text

In [14]:
from PIL import Image, ImageDraw, ImageFont
import pandas as pd
import os

# Function to generate and save individual frames displaying president, day, cumulative exec orders, and global total
def generate_value_frames(rbt_exec_orders, height=200, width=400, output_dir='output_images', font_size=42, font_type="Regular", 
                          font_color=(255, 255, 255), font_outline_width=2, font_outline_color=(0, 0, 0)):
    # Available font types (from the Montserrat font family):
    font_types = [
        "Black",
        "BlackItalic",
        "Bold",
        "BoldItalic",
        "ExtraBold",
        "ExtraBoldItalic",
        "ExtraLight",
        "ExtraLightItalic",
        "Italic",
        "Light",
        "LightItalic",
        "Medium",
        "MediumItalic",
        "Regular",
        "SemiBold",
        "SemiBoldItalic",
        "Thin",
        "ThinItalic"
    ]
    
    # Print the available font types for easy reference
    print(f"Available font types: {', '.join(font_types)}")
    
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Get unique presidents
    presidents = rbt_exec_orders['president'].unique()

    # Initialize frame index
    frame_index = 1

    # Path to the Montserrat font, allowing font type to be chosen
    font_path = f'Montserrat/Montserrat-{font_type}.ttf'  # Use the selected font type

    # Try to load the Montserrat font, fallback to default font if fails
    try:
        font = ImageFont.truetype(font_path, font_size)  # Adjust font and size
    except IOError:
        print(f"Montserrat font {font_type} not found. Using default font.")
        font = ImageFont.load_default()

    # Initialize the global total for all presidents
    global_total_val = 0

    # Loop through each president
    for president_name in presidents:
        # Filter the data for the current president
        president_data = rbt_exec_orders[rbt_exec_orders['president'] == president_name]

        # Check if the president exists in the data
        if president_data.empty:
            print(f"No data available for {president_name}")
            continue

        # Loop through each day from 1 to 50 for the current president
        for day in range(1, 51):
            # Filter the data for the current day
            chart_data = president_data[president_data['day'] == day]

            # Extract the president, day, cumulative executive orders for the current day
            president_val = chart_data['president'].iloc[0]
            day_val = chart_data['day'].iloc[0]
            cum_exec_orders_val = chart_data['cum_exec_orders'].iloc[0]

            # Update the global total only if there is a change in cumulative executive orders
            if cum_exec_orders_val > global_total_val:
                global_total_val = cum_exec_orders_val

            # Create an empty image with transparent background
            img = Image.new('RGBA', (width, height), color=(0, 0, 0, 0))  # 'RGBA' for transparency
            draw = ImageDraw.Draw(img)

            # Define the text to be displayed (with space between lines)
            text = f"President: {president_val}\n\nDay: {day_val}\n\nExec Orders: {cum_exec_orders_val}\n\nMaximum: {global_total_val}"

            # Position the text in the left side (no centering)
            text_bbox = draw.textbbox((0, 0), text, font=font)  # Use textbbox to get the text size
            text_width, text_height = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
            text_position = (10, (height - text_height) // 2)  # Align to the left with some padding (10 pixels)

            # Add outline to the text (by drawing the text multiple times with a small offset)
            if font_outline_width > 0:
                # Draw the outline by offsetting the position
                for dx in range(-font_outline_width, font_outline_width + 1):
                    for dy in range(-font_outline_width, font_outline_width + 1):
                        if dx != 0 or dy != 0:  # Skip the center position
                            draw.text((text_position[0] + dx, text_position[1] + dy), text, font=font, fill=font_outline_color)

            # Add the actual text in the specified font color
            draw.text(text_position, text, font=font, fill=font_color)

            # Construct the file name with zero-padding for 4 digits (e.g., 0001.png, 0002.png)
            file_name = f"{frame_index:04d}.png"
            file_path = os.path.join(output_dir, file_name)

            # Save the image as a .png file
            img.save(file_path, 'PNG')  # Ensure the image is saved with a .png format

            # Increment the frame index
            frame_index += 1

    print(f"Frames saved in the directory: {output_dir}")

# Example of calling the function with the font type set to 'Bold', font size 42, and custom font color and outline
generate_value_frames(rbt_exec_orders, height=720, width=1280, output_dir='value_frames', font_size=51, font_type="ExtraBold", 
                      font_color=(255, 255, 255), font_outline_width=3, font_outline_color=(0, 0, 0))

Available font types: Black, BlackItalic, Bold, BoldItalic, ExtraBold, ExtraBoldItalic, ExtraLight, ExtraLightItalic, Italic, Light, LightItalic, Medium, MediumItalic, Regular, SemiBold, SemiBoldItalic, Thin, ThinItalic
Frames saved in the directory: value_frames


### Presidents' Name

In [23]:
from PIL import Image, ImageDraw, ImageFont
import pandas as pd
import os

# Function to generate and save individual frames displaying only the name of the president
def generate_name_frames(rbt_exec_orders, height=200, width=400, output_dir='name_frames', font_size=42, font_type="Regular", 
                          font_color=(255, 255, 0), font_outline_width=2, font_outline_color=(0, 0, 0)):
    # Available font types (from the Montserrat font family):
    font_types = [
        "Black",
        "BlackItalic",
        "Bold",
        "BoldItalic",
        "ExtraBold",
        "ExtraBoldItalic",
        "ExtraLight",
        "ExtraLightItalic",
        "Italic",
        "Light",
        "LightItalic",
        "Medium",
        "MediumItalic",
        "Regular",
        "SemiBold",
        "SemiBoldItalic",
        "Thin",
        "ThinItalic"
    ]
    
    # Print the available font types for easy reference
    print(f"Available font types: {', '.join(font_types)}")
    
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    # Get unique presidents
    presidents = rbt_exec_orders['president'].unique()

    # Initialize frame index
    frame_index = 1

    # Path to the Montserrat font, allowing font type to be chosen
    font_path = f'Montserrat/Montserrat-{font_type}.ttf'  # Use the selected font type

    # Try to load the Montserrat font, fallback to default font if fails
    try:
        font = ImageFont.truetype(font_path, font_size)  # Adjust font and size
    except IOError:
        print(f"Montserrat font {font_type} not found. Using default font.")
        font = ImageFont.load_default()

    # Loop through each president
    for president_name in presidents:
        # Filter the data for the current president
        president_data = rbt_exec_orders[rbt_exec_orders['president'] == president_name]

        # Check if the president exists in the data
        if president_data.empty:
            print(f"No data available for {president_name}")
            continue

        # Adjust for "Donald Trump – I" and "Donald Trump – II"
        if president_name == "Donald Trump – I":
            president_name = "Donald Trump (1st)"
        elif president_name == "Donald Trump – II":
            president_name = "Donald Trump (2nd)"

        # Loop through each day (up to 50 days for each president)
        for day in range(1, 51):
            # Extract the president name for each day
            president_val = president_name

            # Create an empty image with transparent background
            img = Image.new('RGBA', (width, height), color=(0, 0, 0, 0))  # 'RGBA' for transparency
            draw = ImageDraw.Draw(img)

            # Define the text to be displayed (just the president's name)
            text = f"{president_val}"

            # Position the text at the top (leave some space between top and text)
            text_bbox = draw.textbbox((0, 0), text, font=font)  # Use textbbox to get the text size
            text_width, text_height = text_bbox[2] - text_bbox[0], text_bbox[3] - text_bbox[1]
            text_position = ((width - text_width) // 2, 10)  # Align to the top with a small padding (10 pixels)

            # Add outline to the text (by drawing the text multiple times with a small offset)
            if font_outline_width > 0:
                # Draw the outline by offsetting the position
                for dx in range(-font_outline_width, font_outline_width + 1):
                    for dy in range(-font_outline_width, font_outline_width + 1):
                        if dx != 0 or dy != 0:  # Skip the center position
                            draw.text((text_position[0] + dx, text_position[1] + dy), text, font=font, fill=font_outline_color)

            # Add the actual text in specified font color
            draw.text(text_position, text, font=font, fill=font_color)

            # Construct the file name with zero-padding for 4 digits (e.g., 0001.png, 0002.png)
            file_name = f"{frame_index:04d}.png"
            file_path = os.path.join(output_dir, file_name)

            # Save the image as a .png file
            img.save(file_path, 'PNG')  # Ensure the image is saved with a .png format

            # Increment the frame index
            frame_index += 1

    print(f"Frames saved in the directory: {output_dir}")

# Example of calling the function with the font type set to 'ExtraBold', font size 51, and custom font color and outline
generate_name_frames(rbt_exec_orders, height=720, width=1280, output_dir='name_frames', font_size=51, font_type="ExtraBold", 
                      font_color=(255, 255, 0), font_outline_width=3, font_outline_color=(0, 0, 0))

Available font types: Black, BlackItalic, Bold, BoldItalic, ExtraBold, ExtraBoldItalic, ExtraLight, ExtraLightItalic, Italic, Light, LightItalic, Medium, MediumItalic, Regular, SemiBold, SemiBoldItalic, Thin, ThinItalic
Frames saved in the directory: name_frames


### Presidential Photos

In [25]:
import os
import requests
from bs4 import BeautifulSoup
import re
import time

# Ensure soupsieve is installed
try:
    import soupsieve
except ImportError:
    os.system("pip install soupsieve")
    import soupsieve

# Create directories for images
os.makedirs("images/presidents", exist_ok=True)

# Wikipedia URL
PRESIDENTS_URL = "https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States"

# Function to scrape names from Wikipedia
def get_names(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    table = soup.find("table", class_="wikitable")
    names = []
    
    if table:
        rows = table.find_all("tr")[1:]  # Skip header row
        for index, row in enumerate(rows, start=1):
            columns = row.find_all("td")
            if columns and len(columns) > 1:  # Skip first column (row number)
                name = columns[1].text.strip()
                name = re.sub(r'\(.*?\)', '', name)  # Remove birth-death years
                name = re.sub(r'\[.*?\]', '', name)  # Remove citations
                name = name.strip()
                if name and index >= 25:  # Only include presidents 25 and up
                    names.append(name)
    
    return names

# Fetch names
presidents = get_names(PRESIDENTS_URL)
print("Presidents List:", presidents)

# Wikipedia API to fetch image URLs
def get_wikipedia_image(name):
    formatted_name = name.replace(' ', '_')
    api_url = f"https://en.wikipedia.org/api/rest_v1/page/summary/{formatted_name}"
    response = requests.get(api_url)
    if response.status_code == 200:
        data = response.json()
        if "originalimage" in data:
            return data["originalimage"]["source"]
    return None

# Download images
def download_image(url, path):
    if url:
        headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36"}
        for attempt in range(3):  # Retry up to 3 times
            print(f"Downloading (attempt {attempt+1}) {url} -> {path}")
            response = requests.get(url, headers=headers, allow_redirects=True, stream=True)
            if response.status_code == 200:
                with open(path, "wb") as file:
                    for chunk in response.iter_content(1024):
                        file.write(chunk)
                print(f"✅ Saved: {path}")
                return
            else:
                print(f"❌ Failed (attempt {attempt+1}) with status code: {response.status_code}")
                time.sleep(2)  # Wait before retrying
        print(f"🚨 Skipping: {url} after 3 failed attempts")

# Process Presidents and save as 25, 26, ..., 47
for index, president in enumerate(presidents, start=25):
    image_url = get_wikipedia_image(president)
    if image_url:
        download_image(image_url, f"images/presidents/{index}.jpg")
    else:
        print(f"❌ No image found for {president}")

print("All available images downloaded successfully!")

Presidents List: ['William McKinley', 'Theodore Roosevelt', 'William Howard Taft', 'Woodrow Wilson', 'Warren G. Harding', 'Calvin Coolidge', 'Herbert Hoover', 'Franklin D. Roosevelt', 'Harry S. Truman', 'Dwight D. Eisenhower', 'John F. Kennedy', 'Lyndon B. Johnson', 'Richard Nixon', 'Gerald Ford', 'Jimmy Carter', 'Ronald Reagan', 'George H. W. Bush', 'Bill Clinton', 'George W. Bush', 'Barack Obama', 'Donald Trump', 'Joe Biden', 'Donald Trump']
Downloading (attempt 1) https://upload.wikimedia.org/wikipedia/commons/6/6d/Mckinley.jpg -> images/presidents/25.jpg
✅ Saved: images/presidents/25.jpg
Downloading (attempt 1) https://upload.wikimedia.org/wikipedia/commons/5/5b/Theodore_Roosevelt_by_the_Pach_Bros.jpg -> images/presidents/26.jpg
✅ Saved: images/presidents/26.jpg
Downloading (attempt 1) https://upload.wikimedia.org/wikipedia/commons/a/a1/Cabinet_card_of_William_Howard_Taft_by_Pach_Brothers_-_Cropped_to_image.jpg -> images/presidents/27.jpg
✅ Saved: images/presidents/27.jpg
Downloadi

In [31]:
from PIL import Image
import os

def process_images(input_dir, output_dir, target_aspect_ratio=(2, 3)):
    # Create the output directory if it doesn't exist
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Get all the image files
    image_files = [f for f in os.listdir(input_dir) if f.endswith('.jpg')]

    # Loop over each image file
    for image_file in image_files:
        image_path = os.path.join(input_dir, image_file)
        img = Image.open(image_path)

        # Get the current dimensions of the image
        width, height = img.size
        current_aspect_ratio = width / height

        # Print the current aspect ratio
        print(f"Processing {image_file}: Aspect ratio = {current_aspect_ratio} (Width: {width}, Height: {height})")

        # Target aspect ratio = 3x height by 2x width (width/height = 2/3)
        target_width = int(height * target_aspect_ratio[0] / target_aspect_ratio[1])

        # Crop the image if necessary (cutting off the bottom part to match the target aspect ratio)
        if width != target_width:
            left = 0
            top = 0
            right = min(width, target_width)  # Keep the width the same or crop
            bottom = height  # Retain the original height
            img_cropped = img.crop((left, top, right, bottom))  # Crop the image
        else:
            img_cropped = img
        
        # Save the processed image to the output directory
        output_path = os.path.join(output_dir, image_file)
        img_cropped.save(output_path)

        print(f"Processed image: {image_file} saved at {output_path}")

# Example usage
process_images("images/presidents", "images/processed")

Processing 28.jpg: Aspect ratio = 0.7502374169040835 (Width: 6320, Height: 8424)
Processed image: 28.jpg saved at images/processed/28.jpg
Processing 29.jpg: Aspect ratio = 0.7357043235704324 (Width: 3165, Height: 4302)
Processed image: 29.jpg saved at images/processed/29.jpg
Processing 39.jpg: Aspect ratio = 0.8140108534780464 (Width: 1650, Height: 2027)
Processed image: 39.jpg saved at images/processed/39.jpg
Processing 38.jpg: Aspect ratio = 0.8026267785479752 (Width: 2200, Height: 2741)
Processed image: 38.jpg saved at images/processed/38.jpg
Processing 35.jpg: Aspect ratio = 0.7711956521739131 (Width: 1419, Height: 1840)
Processed image: 35.jpg saved at images/processed/35.jpg
Processing 34.jpg: Aspect ratio = 0.8029357798165138 (Width: 2188, Height: 2725)
Processed image: 34.jpg saved at images/processed/34.jpg
Processing 36.jpg: Aspect ratio = 0.752442996742671 (Width: 924, Height: 1228)
Processed image: 36.jpg saved at images/processed/36.jpg
Processing 37.jpg: Aspect ratio = 0.