In [1]:
import pandas as pd
import altair as alt
import numpy as np
import eco_style
alt.themes.enable('light')

ThemeRegistry.enable('light')

In [None]:
[[f"rta_{col}", "" ]]

In [60]:
df = pd.read_excel("Fig 1 - RTA.xlsx", skiprows=0)
categories = ["Chemistry", "Clean", "Electrical Engineering", "Instruments", "Mechanical Engineering", "Other Fields", "Trending"]
data_list = []

for i, category in enumerate(categories):
    category_data = df.iloc[1:, [0, 1, 2*i+2, 2*i+3]]
    category_data.columns = ["Category", "CAGR", "RTA", "World Share"]
    category_data["Field"] = category
    data_list.append(category_data)

# Combine all the category data into a single DataFrame
combined_df = pd.concat(data_list, ignore_index=True)

# Convert data types
combined_df["CAGR"] = combined_df["CAGR"].astype(float)
combined_df["RTA"] = combined_df["RTA"].astype(float)
combined_df["World Share"] = combined_df["World Share"].astype(float)

# Drop rows with NaN values
combined_df = combined_df.dropna()

focus = [
    "Food chemistry", "Robotics", "Biotechnology", "Instruments - Medical Technology", "Wireless",
    "Other consumer goods", "Artificial Intelligence", "Transport Technologies", "Aerospace", "Audio-visual technology",  
    "3D Printing", "Instruments - Optics", 
]


combined_df['label'] = np.where(combined_df['Category'].isin(focus), combined_df['Category'], "")

# Create the bubble chart
bubble_chart = alt.Chart(combined_df).mark_circle().encode(
    x=alt.X('CAGR', 
            scale=alt.Scale(domain=[-0.05, 0.4]),
            
            axis=alt.Axis(
        grid=True,
        gridOpacity=0.5,
        gridColor="#676A86",
        gridDash=[1, 5],
        titleX=260,
        format='%'), title="10-year annualised growth of global patent volumes"),
    y=alt.Y("RTA", axis=alt.Axis(title="Revealed Technological Advantage")),
    size=alt.Size('World Share', scale=alt.Scale(range=[20, 1200]), legend=alt.Legend(title='Patent activity Share', format=".0%")),
    color='Field',
    tooltip=['Category', 'CAGR', 'RTA', 'World Share', 'Field']
)

labels = bubble_chart.mark_text(
    align='left',
    baseline='middle',
    dx=15
).encode(
    text='label',
    size=alt.value(10),
)

chart = bubble_chart + labels


chart = chart.properties(
    width=400,
    height=400,
)

chart.save("fig1.json") 
chart.save("fig1.png", scale_factor=2.0)

chart


  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# Fig 2

In [58]:
df = pd.read_excel("Fig 2 - RTA -clean.xlsx", skiprows=4)
df = df.iloc[:, 10:]
df.columns = ["name", "value"]
df = df.dropna()

chart = alt.Chart(df).mark_bar(color="#36B7B4").encode(
    x=alt.X("value", axis=alt.Axis(title="Revealed Technological Advantage", titleX=300)),
    y=alt.Y("name", axis=alt.Axis(title=""), sort="-x"),
    tooltip=["name", "value"]
)

chart

chart.save("fig2.json")
chart.save("fig2.png", scale_factor=2.0)

chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


# Fig 3

In [97]:
df = pd.read_excel("Fig 3 - scatter.xlsx", skiprows=0)
df = df.iloc[:, [0, 1, 2]].copy()
df.columns = ["Region", "GVA per hour", "Share of clean patents"]

# Convert data types
df["GVA per hour"] = df["GVA per hour"].astype(float)
df["Share of clean patents"] = df["Share of clean patents"].astype(float)

# region lookups

parents = {
  "Bedfordshire and Hertfordshire": "England",
  "Berkshire, Buckinghamshire and Oxfordshire": "England",
  "Cheshire": "England",
  "Cornwall and Isles of Scilly": "England",
  "Cumbria": "England",
  "Derbyshire and Nottinghamshire": "England",
  "Devon": "England",
  "Dorset and Somerset": "England",
  "East Anglia": "England",
  "East Wales": "Wales",
  "East Yorkshire and Northern Lincolnshire": "England",
  "Eastern Scotland": "Scotland",
  "Essex": "England",
  "Gloucestershire, Wiltshire and Bath/Bristol area": "England",
  "Greater Manchester": "England",
  "Hampshire and Isle of Wight": "England",
  "Herefordshire, Worcestershire and Warwickshire": "England",
  "Highlands and Islands": "Scotland",
  "Inner London": "England",
  "Kent": "England",
  "Lancashire": "England",
  "Leicestershire, Rutland and Northamptonshire": "England",
  "Lincolnshire": "England",
  "Merseyside": "England",
  "North Eastern Scotland": "Scotland",
  "North Yorkshire": "England",
  "Northern Ireland": "Northern Ireland",
  "Northumberland and Tyne and Wear": "England",
  "Outer London": "England",
  "Shropshire and Staffordshire": "England",
  "South Western Scotland": "Scotland",
  "South Yorkshire": "England",
  "Surrey, East and West Sussex": "England",
  "Tees Valley and Durham": "England",
  "West Midlands": "England",
  "West Wales and The Valleys": "Wales",
  "West Yorkshire": "England"
}

df["parent"] = df["Region"].map(parents)

label_points = [
  "Cornwall and Isles of Scilly",
  "Derbyshire and Nottinghamshire",
  "Lincolnshire",
  "Shropshire and Staffordshire",
  "Dorset and Somerset",
  "Inner London",
  "Outer London",
  "North Eastern Scotland",
  "Hampshire and Isle of Wight",
]

df["label"] = np.where(df["Region"].isin(label_points), df["Region"], "")



# Drop rows with NaN values
df = df.dropna()

# Create the bubble chart using Altair
bubbles = alt.Chart(df).mark_circle().encode(
    x=alt.X('GVA per hour', axis=alt.Axis(title='GVA per hour (UK = 100)'), scale=alt.Scale(zero=False)),
    y=alt.Y('Share of clean patents', axis=alt.Axis(
        titleY=-20,
        title='Share of region\'s patents that are clean', format='%')),
    size=alt.value(60),
    color=alt.Color('parent', legend=alt.Legend(
                                                orient='none',
                                                direction= 'horizontal',
                                                legendY=-15,
                                                title= None,
                                                 values=["England", "Scotland", "Wales", "Northern Ireland"])),
    tooltip=['Region', 'GVA per hour', {'type': 'quantitative', 'field': 'Share of clean patents', 'title': "clean patents", 'format': '.2%'}]
)

# add a trendline
reg_line = bubbles.transform_regression('GVA per hour', 'Share of clean patents').mark_line(color='black').encode(
    size=alt.value(1)
)

labels = bubbles.mark_text(
    align='left',
    baseline='middle',
    dx=15
).encode(
    text='label',
    size=alt.value(10),
)

chart = bubbles + reg_line + labels

chart = chart.properties(
    width=450,
    height=400,
)

chart.save("fig3.json")
chart.save("fig3.png", scale_factor=2.0)

chart

  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)
  col = df[col_name].apply(to_list_if_array, convert_dtype=False)


In [78]:
df

Unnamed: 0,Region,GVA per hour,Share of clean patents,parent
0,Bedfordshire and Hertfordshire,98.1,0.103295,England
1,"Berkshire, Buckinghamshire and Oxfordshire",117.5,0.095986,England
2,Cheshire,104.43,0.101626,England
3,Cornwall and Isles of Scilly,77.37,0.206897,England
4,Cumbria,83.38,0.106667,England
5,Derbyshire and Nottinghamshire,86.61,0.291281,England
6,Devon,84.05,0.084158,England
7,Dorset and Somerset,83.7,0.041237,England
8,East Anglia,91.79,0.089546,England
9,East Wales,86.72,0.049676,Wales


In [74]:
df.Region.unique()

array(['Bedfordshire and Hertfordshire',
       'Berkshire, Buckinghamshire and Oxfordshire', 'Cheshire',
       'Cornwall and Isles of Scilly', 'Cumbria',
       'Derbyshire and Nottinghamshire', 'Devon', 'Dorset and Somerset',
       'East Anglia', 'East Wales',
       'East Yorkshire and Northern Lincolnshire', 'Eastern Scotland',
       'Essex', 'Gloucestershire, Wiltshire and Bath/Bristol area',
       'Greater Manchester', 'Hampshire and Isle of Wight',
       'Herefordshire, Worcestershire and Warwickshire',
       'Highlands and Islands', 'Inner London', 'Kent', 'Lancashire',
       'Leicestershire, Rutland and Northamptonshire', 'Lincolnshire',
       'Merseyside', 'North Eastern Scotland', 'North Yorkshire',
       'Northern Ireland', 'Northumberland and Tyne and Wear',
       'Outer London', 'Shropshire and Staffordshire',
       'South Western Scotland', 'South Yorkshire',
       'Surrey, East and West Sussex', 'Tees Valley and Durham',
       'West Midlands', 'West Wales a