In [1]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [2]:
# Step 1: Load the CSV file
file_path = "data/bquxjob_1b6527cd_19277d89f7d.csv"  # Update with your file path
data = pd.read_csv(file_path)

In [3]:
# Pivot the data to get years as index, countries as columns, and patent counts as values
pivot_data = data.pivot_table(
    index="year", columns="country", values="patent_count", aggfunc="sum"
).fillna(0)

# Calculate total patents per year
total_patents = pivot_data.sum(axis=1)

In [4]:
# Calculate relative percentages for each country
pivot_data_percentage = pivot_data.div(pivot_data.sum(axis=1), axis=0) * 100

In [9]:
# Step 3: Plot the data using Plotly and display percentages on the bars
def plot_plotly_stacked_bar_and_line(pivot_data, absolute_data, title, filename=None):
    # Create a figure with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])

    # Add stacked bar traces for each country with percentage display
    for country in pivot_data.columns:
        fig.add_trace(
            go.Bar(
                x=pivot_data.index,
                y=pivot_data[country],
                name=country,
                text=pivot_data[country].round(1).astype(str)
                + "%",  # Display rounded percentage
                textposition="inside",  # Display the text inside the bars
                hoverinfo="x+y+text",
            ),
            secondary_y=False,
        )

    # Add dotted line trace for total patent counts, with transparency
    fig.add_trace(
        go.Scatter(
            x=absolute_data.index,
            y=absolute_data,
            mode="lines+markers",
            name="Total Patents",
            line=dict(color="red", dash="dot", width=2),  # Dotted line
            opacity=0.5,  # Make it slightly transparent
            hoverinfo="x+y",
        ),
        secondary_y=True,
    )

    # Set y-axes titles
    fig.update_yaxes(title_text="Percentage (%)", secondary_y=False)
    fig.update_yaxes(title_text="Total Patents", secondary_y=True)

    # Set title and layout
    fig.update_layout(
        title=title,
        barmode="stack",
        xaxis_title="Year",
        legend_title="Legend",
        hovermode="x unified",
    )

    # Show figure
    fig.show()

    # Export to HTML if a filename is provided
    if filename:
        fig.write_html(filename)

In [10]:
# Step 4: Plot the interactive Plotly chart
plot_plotly_stacked_bar_and_line(
    pivot_data_percentage, total_patents, "Patent Share by Country (2014-2024)"
)

In [13]:
# Function to create Plotly chart with 100% stacked bars and trend line for each application area
def plot_per_application_area(data, filename=None):
    # Get unique application areas
    application_areas = data["application_area"].unique()

    # Loop through each application area and generate the plot
    for app_area in application_areas:
        # Filter data for the current application area
        subset = data[data["application_area"] == app_area]

        # Pivot the data to get years as index, countries as columns, and patent counts as values
        pivot_data = subset.pivot_table(
            index="year", columns="country", values="patent_count", aggfunc="sum"
        ).fillna(0)

        # Calculate total patents per year
        total_patents = pivot_data.sum(axis=1)

        # Calculate relative percentages for each country (100% stacked bars)
        pivot_data_percentage = pivot_data.div(pivot_data.sum(axis=1), axis=0) * 100

        # Plot the data using Plotly
        fig = make_subplots(specs=[[{"secondary_y": True}]])

        # Add stacked bar traces for each country with percentage display
        for country in pivot_data_percentage.columns:
            fig.add_trace(
                go.Bar(
                    x=pivot_data_percentage.index,
                    y=pivot_data_percentage[country],
                    name=country,
                    text=pivot_data_percentage[country].round(1).astype(str)
                    + "%",  # Display rounded percentage
                    textposition="inside",  # Display the text inside the bars
                    hoverinfo="x+y+text",
                ),
                secondary_y=False,
            )

        # Add dotted line trace for total patent counts, with transparency
        fig.add_trace(
            go.Scatter(
                x=total_patents.index,
                y=total_patents,
                mode="lines+markers",
                name="Total Patents",
                line=dict(color="red", dash="dot", width=2),  # Dotted line
                opacity=0.5,  # Make it slightly transparent
                hoverinfo="x+y",
            ),
            secondary_y=True,
        )

        # Set y-axes titles
        fig.update_yaxes(title_text="Percentage (%)", secondary_y=False)
        fig.update_yaxes(title_text="Total Patents", secondary_y=True)

        # Set title and layout
        fig.update_layout(
            title=f"100% Stacked Patent Share by Country in {app_area} (2014-2024)",
            barmode="stack",  # Ensure bars are stacked
            xaxis_title="Year",
            legend_title="Legend",
            hovermode="x unified",
        )

        # Show figure
        fig.show()

        # Export to HTML if a filename is provided
        if filename:
            fig.write_html(f"{app_area}_patent_share.html")

In [14]:
plot_per_application_area(data)

In [None]:
query = """select
    a,
    b,
    c
from d limit 10
"""
