In [32]:
import pandas as pd
import plotly.express as px
import re
import ast


# Load the extended dataset from Google Drive
AI_Reg_Maturity_Healthcare_2025_url = "https://drive.google.com/file/d/17BNtieVUeinm4UEzqgaqXbbaMw1IM5j5/view?usp=drive_link"

def import_csv(url):
    path = "https://drive.google.com/uc?export=download&id=" + url.split("/")[-2]
    return pd.read_csv(path)

# Import the dataset
df = import_csv(AI_Reg_Maturity_Healthcare_2025_url)


#Search for AI mentions in healthcare or drug development

Includes both scientific publications and national policies/frameworks (2023–mid-2025)

Methodology:

Use a mix of authoritative databases:

PubMed/NCBI

WHO & OECD

Government websites

arXiv / medRxiv / Nature / Science

National strategies from AI.gov and health ministries

#Extract AI submission mentions from a list of comments
Parses each cell like ['comment1', 'comment2'] into a Python list.

Applies regex to each comment string inside the list.

Adds up all the valid mentions per country.

##Preview real examples from References/Comments

In [33]:
import ast

# Safely parse the list from string
def parse_comment_list(raw_comment):
    try:
        return ast.literal_eval(raw_comment)
    except:
        return []

# Preview a few parsed examples
for i in range(5):
    print(f"\n--- Example {i+1} ---")
    print(parse_comment_list(df['References/Comments'].iloc[i]))



--- Example 1 ---
['No documented evidence as of 2025 Minimal or no evidence of national AI strategy in healthcare.', '0']

--- Example 2 ---
['Albania is part of European regional programs via EU neighbors and WHO-Europe that explore digital health strategies. No significant national AI drug development initiatives documented.']

--- Example 3 ---
['No major AI healthcare frameworks or drug development programs identified. Algeria has participated in African Union-wide digital health and data harmonization efforts but lacks country-specific policies for AI in pharmaceuticals. Member of AU. African Union has endorsed digital health strategies incorporating AI elements.', '0']

--- Example 4 ---
['As a U.S. territory, American Samoa falls under U.S. FDA oversight. However, no local initiatives or deployments of AI in healthcare or drug development are recorded.']

--- Example 5 ---
['Andorra collaborates with WHO and regional EU partners on e-health integration. No direct evidence of n

In [34]:
# Function to remove numeric last element from list
def remove_last_if_number(raw_comment):
    try:
        parsed = ast.literal_eval(raw_comment)
        if isinstance(parsed, list) and parsed:
            last = parsed[-1]
            if isinstance(last, str) and last.strip().isdigit():
                return parsed[:-1]  # return list without last item
        return parsed
    except Exception:
        return raw_comment  # leave unchanged if parsing fails

# Apply to column
df['References/Comments'] = df['References/Comments'].apply(remove_last_if_number)

# Preview cleaned data
print(df['References/Comments'].head())


0    [No documented evidence as of 2025 Minimal or ...
1    [Albania is part of European regional programs...
2    [No major AI healthcare frameworks or drug dev...
3    [As a U.S. territory, American Samoa falls und...
4    [Andorra collaborates with WHO and regional EU...
Name: References/Comments, dtype: object


In [40]:
df['AI Mentions in Submissions (2025)'].head()

Unnamed: 0,AI Mentions in Submissions (2025)
0,0
1,3
2,0
3,0
4,0


🎯 What Is the “AI Maturity Score (0–100)”?
This score is likely a composite indicator intended to quantify a country’s regulatory maturity around AI in pharmaceutical development. It appears on a 0–100 scale and is probably derived from qualitative inputs like:

Source Column	What It Suggests
AI Policy Mentioned	Whether any policy framework exists
AI Mentions in Submissions (2025)	Level of regulatory or legislative activity
Key National Initiative	Government engagement or strategy
References/Comments	Contextual insight or evidence

⚙️ How It Might Be Scored
Let’s assume a simple rules-based scoring model to demonstrate:

Feature	Points
AI Policy Mentioned = Yes	+30
AI Policy Mentioned = Yes, economic block (e.g., EU)	+25
AI Mentions in Submissions (2025)	2 pts per mention (capped at 20 pts)
Key National Initiative is not empty	+30
Relevant notes in Comments (e.g., strong strategy)	+20

This gives us a max score of around 100.

In [37]:
# Function to compute maturity score
def compute_maturity_score(row):
    score = 0

    # Policy Mentioned
    if pd.isna(row['AI Policy Mentioned']) == False:
        policy = str(row['AI Policy Mentioned']).lower()
        if "yes" in policy and "economic block" in policy:
            score += 25
        elif "yes" in policy:
            score += 30

    # Mentions in submissions
    try:
        mentions = int(row['AI Mentions in Submissions (2025)'])
        score += min(20, mentions * 2)
    except:
        pass

    # Key initiative
    if pd.notna(row['Key National Initiative']) and row['Key National Initiative'].strip() != "":
        score += 30

    # Extra points from comments (e.g., strong evidence)
    comment = str(row['References/Comments']).lower()
    if any(kw in comment for kw in ['framework', 'strategy', 'guideline']):
        score += 15
    if "advanced" in comment or "leading" in comment:
        score += 5

    # Cap to 100
    return min(score, 100)

# Apply to DataFrame
df['AI Maturity Score (0-100)'] = df.apply(compute_maturity_score, axis=1)

Don't forget to download/save your new DataFrames. Also, give them an obvious name, so that you know they are the cleaned version and not the original DataFrame.

In [38]:
from google.colab import files

df.to_csv("df_scored.csv", index=False)
files.download("df_scored.csv")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [39]:
# Create the choropleth map
fig = px.choropleth(
    df,
    locations='Country',
    locationmode='country names',
    color='AI Maturity Score (0-100)',
    hover_name='Country',
    hover_data={
        'AI Maturity Score (0-100)': True,
        'AI Mentions in Submissions (2025)': True,
        'AI Policy Mentioned': True,
        'Key National Initiative': True
    },
    color_continuous_scale='YlGnBu',
    title='Figure 1. Global AI Regulatory Maturity in Pharmaceutical Development (2023–2025)',
    template='plotly_white'
)

# Enhance layout
fig.update_layout(
    geo=dict(showframe=False, showcoastlines=True, projection_type='equirectangular'),
    margin=dict(l=30, r=30, t=80, b=30),
    title_font=dict(size=18, family='Arial')
)

# Clean country borders
fig.update_traces(marker_line_width=0.5, marker_line_color='gray')

# Show the interactive map
fig.show()