In [None]:
import requests
import json
import os
import sqlalchemy
import pandas as pd
import mdapi_functions as md
from IPython.display import display, Markdown
import markdown
from bokeh.plotting import figure, show
from bokeh.models import (Span, TabPanel, Tabs, ColumnDataSource, DataCube,
                          GroupingInfo, StringFormatter, SumAggregator,
                          TableColumn, HoverTool, LinearColorMapper, ColorBar)
from bokeh.palettes import Category20c, Cividis256
from bokeh.transform import cumsum
from bokeh.io import output_notebook
from math import pi
from datetime import datetime
import datacite_api_functions as dcf
import numpy as np
import data_doc_helper as dh

from math import pi
import math

import data_doc_helper as dh
import geopandas as gpd


from bokeh.io import output_notebook
output_notebook(hide_banner=True)

In [None]:
output_notebook(hide_banner=True)

x = {
    'United States': 157,
    'United Kingdom': 93,
    'Japan': 89,
    'China': 63,
    'Germany': 44,
    'India': 42,
    'Italy': 40,
    'Australia': 35,
    'Brazil': 32,
    'France': 31,
    'Taiwan': 31,
    'Spain': 29,
}

data = pd.Series(x).reset_index(name='value').rename(columns={'index': 'country'})
data['angle'] = data['value']/data['value'].sum() * 2*pi
data['color'] = Category20c[len(x)]

p = figure(height=350, title="Pie Chart", toolbar_location=None,
           tools="hover", tooltips="@country: @value", x_range=(-0.5, 1.0))

p.wedge(x=0, y=1, radius=0.4,
        start_angle=cumsum('angle', include_zero=True), end_angle=cumsum('angle'),
        line_color="white", fill_color='color', legend_field='country', source=data)

p.axis.axis_label = None
p.axis.visible = False
p.grid.grid_line_color = None

show(p)

In [None]:
from bokeh.plotting import figure, show, output_file

p = figure(x_range=(-1400000, 400000), y_range=(6300000, 8800000),
           x_axis_type="mercator", y_axis_type="mercator")
p.add_tile("OpenStreetMap Mapnik")


def latlon_to_mercator(lat, lon):
    r_major = 6378137.000
    x = r_major * math.radians(lon)
    scale = x / lon
    y = 180.0 / math.pi * math.log(math.tan(math.pi / 4.0 + lat * (math.pi / 180.0) / 2.0)) * scale
    return (x, y)

# --- 2. Define Northern Ireland Polygon (Rough Lat/Lon) ---
# A simplified shape roughly tracing the border of NI
ni_lats = [55.3, 55.2, 54.8, 54.6, 54.1, 54.1, 54.2, 54.5, 54.8] 
ni_lons = [-6.8, -6.2, -5.8, -5.5, -6.0, -7.3, -7.8, -8.1, -7.5]

# Convert these lists to Mercator
mercator_x = []
mercator_y = []
for lat, lon in zip(ni_lats, ni_lons):
    x, y = latlon_to_mercator(lat, lon)
    mercator_x.append(x)
    mercator_y.append(y)

source = ColumnDataSource(data=dict(
    xs=[mercator_x], 
    ys=[mercator_y],
    name=["Northern Ireland"]
))

p.patches('xs', 'ys', source=source,
          fill_color="green", fill_alpha=0.2, 
          line_color="darkgreen", line_width=2)


p.grid.visible = False

show(p)

In [None]:
geo_polys = gpd.read_file("regions.geojson")

In [None]:
geo_counts = pd.read_csv("geo_locations.csv").drop(columns=["index", "source_stem"]).set_index("source").transpose().reset_index()
geo_counts

In [None]:
# l = id, r = source
geo_data = geo_polys.merge(geo_counts, left_on="id", right_on="index")

In [None]:
geo_data2 = geo_data.to_crs(epsg=3857)

In [None]:
geo_data2

In [None]:
def get_bokeh_coords(geometry):
    """
    Extracts x and y coordinates from a shapely Geometry.
    Returns a tuple (x_coords, y_coords).
    """
    if geometry.geom_type == 'Polygon':
        x, y = geometry.exterior.xy
        return [[list(x)], [list(y)]]
    elif geometry.geom_type == 'MultiPolygon':
        xs = []
        ys = []
        for poly in geometry.geoms:
            x, y = poly.exterior.xy
            xs.append(list(x))
            ys.append(list(y))
        return [xs, ys]
    return [[], []]

In [None]:
geo_data2['bokeh_coords'] = geo_data2['geometry'].apply(get_bokeh_coords)
geo_data2['xs'] = geo_data2['bokeh_coords'].apply(lambda x: x[0])
geo_data2['ys'] = geo_data2['bokeh_coords'].apply(lambda x: x[1])

In [None]:
source = ColumnDataSource(geo_data2.drop(columns=["geometry", "bokeh_coords"]))

In [None]:
# --- 3. Set up the Color Mapper ---
# 'low' and 'high' define the range of your data
color_mapper = LinearColorMapper(palette=Viridis256, low=0, high=100)

In [None]:
p = figure(title="UK Choropleth Map", 
           tools="pan,wheel_zoom,reset,save",
           x_axis_location=None, y_axis_location=None,
           x_axis_type="mercator", y_axis_type="mercator") # Hide axes for a clean look
p.grid.grid_line_color = None

In [None]:
p.patches('xs', 'ys', source=source,
         # fill_color={'field': 'TEDS', 'transform': color_mapper}, # The magic line
          fill_alpha=0.8,
          line_color="white", 
          line_width=0.5)

In [None]:
# --- 5. Add UI Elements (Hover & ColorBar) ---
# Add a hover tool to see the region name and count
hover = HoverTool(tooltips=[
    ("Region", "@id"),
    ("Count", "@TEDS")
])
p.add_tools(hover)

# Add a color bar legend
# color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, border_line_color=None, location=(0,0))
# p.add_layout(color_bar, 'right')

In [None]:
show(p)

In [None]:
geo_data['bokeh_coords'] = geo_data['geometry'].apply(get_bokeh_coords)
geo_data['xs'] = geo_data['bokeh_coords'].apply(lambda x: x[0])
geo_data['ys'] = geo_data['bokeh_coords'].apply(lambda x: x[1])

source = ColumnDataSource(geo_data.drop(columns=["geometry", "bokeh_coords"]))

# --- 3. Set up the Color Mapper ---
# 'low' and 'high' define the range of your data
color_mapper = LinearColorMapper(palette=Viridis256, low=0, high=100)

p = figure(title="UK Choropleth Map", 
           tools="pan,wheel_zoom,reset,save",
           x_axis_location=None, y_axis_location=None) # Hide axes for a clean look
p.grid.grid_line_color = None

p.patches('xs', 'ys', source=source,
         # fill_color={'field': 'TEDS', 'transform': color_mapper}, # The magic line
          fill_alpha=0.8,
          line_color="black", 
          line_width=0.5)

# --- 5. Add UI Elements (Hover & ColorBar) ---
# Add a hover tool to see the region name and count
hover = HoverTool(tooltips=[
    ("Region", "@id"),
    ("Count", "@TEDS")
])
p.add_tools(hover)

# Add a color bar legend
# color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, border_line_color=None, location=(0,0))
# p.add_layout(color_bar, 'right')

show(p)

In [None]:
import pandas as pd
import geopandas as gpd
from bokeh.plotting import figure, show
from bokeh.models import ColumnDataSource, LinearColorMapper, HoverTool
from bokeh.palettes import Viridis256

# --- 1. Explode MultiPolygons into Polygons ---
# This ensures that a region with islands becomes multiple rows, 
# making it much easier for Bokeh to render.
geo_data = geo_data.explode(index_parts=True).reset_index(drop=True)

# --- 2. Simplified Coordinate Extraction ---
def get_coords(geometry):
    """Extract simple x and y lists from a single Polygon."""
    if geometry.geom_type == 'Polygon':
        x, y = geometry.exterior.xy
        return list(x), list(y)
    return [], []

# Apply the simplified extraction
coords = geo_data['geometry'].apply(get_coords)
geo_data['xs'] = coords.apply(lambda x: x[0])
geo_data['ys'] = coords.apply(lambda x: x[1])

# --- 3. Clean and Prepare Source ---
# Ensure we drop the geometry object to avoid serialization errors
df_final = pd.DataFrame(geo_data.drop(columns=["geometry"]))
source = ColumnDataSource(df_final)

# --- 4. Setup the Plot ---
color_mapper = LinearColorMapper(palette=Viridis256, low=0, high=100)

# We use DataRange1d (default) but ensure the figure is sized correctly
p = figure(title="UK Choropleth Map", 
           tools="pan,wheel_zoom,reset,save",
           x_axis_location=None, y_axis_location=None,
           match_aspect=True) # match_aspect prevents the UK from looking 'stretched'

p.patches('xs', 'ys', source=source,
          fill_color={'field': 'TEDS', 'transform': color_mapper}, 
          fill_alpha=0.8,
          line_color="black", 
          line_width=0.5)

# Add HoverTool
hover = HoverTool(tooltips=[
    ("Region", "@id"),
    ("Count", "@TEDS")
])
p.add_tools(hover)

p.grid.grid_line_color = None
show(p)

In [None]:
# Explodes multi-polygons into polygons
geo_data2 = geo_data2.explode(index_parts=True).reset_index(drop=True)

# Extracts coordinates
def get_coords(geometry):
    """Extract simple x and y lists from a single Polygon."""
    if geometry.geom_type == 'Polygon':
        x, y = geometry.exterior.xy
        return list(x), list(y)
    return [], []

coords = geo_data2['geometry'].apply(get_coords)
geo_data2['xs'] = coords.apply(lambda x: x[0])
geo_data2['ys'] = coords.apply(lambda x: x[1])

# drop geometry
df_final = pd.DataFrame(geo_data2.drop(columns=["geometry"]))
source = ColumnDataSource(df_final)

# --- 4. Setup the Plot ---
color_mapper = LinearColorMapper(palette=Viridis256, 
                                 low=df_final["TEDS"].min(), 
                                 high=df_final["TEDS"].max())

# We use DataRange1d (default) but ensure the figure is sized correctly
p = figure(title="UK Choropleth Map", 
           tools="pan,wheel_zoom,reset,save",
           x_axis_location=None, y_axis_location=None,
           x_axis_type="mercator", y_axis_type="mercator",
           match_aspect=True) # match_aspect prevents the UK from looking 'stretched'

p.patches('xs', 'ys', source=source,
          fill_color={'field': 'TEDS', 'transform': color_mapper}, 
          fill_alpha=0.8,
          line_color="black", 
          line_width=0.5)

# Add HoverTool
hover = HoverTool(tooltips=[
    ("Region", "@id"),
    ("Count", "@TEDS")
])
p.add_tools(hover)

# Add a color bar legend
color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, border_line_color=None, location=(0,0))
p.add_layout(color_bar, 'right')

p.grid.grid_line_color = None
show(p)

In [None]:
def study_choropleth(study: str):
    """Generates choropleth map showing regional coverage of participants. 

    Args:
        study (str): study (e.g. "TEDS")

    Returns:
        Markdown/Bokeh: Either "unavailable" or Bokeh plot of choropleth 
    """

    # read counts (from CSV for now, TODO: API endpoint), transform
    #geo_counts = pd.read_csv("geo_locations.csv").drop(columns=["index", "source_stem"]).set_index("source").transpose().reset_index()
    geo_counts = md.get_geo_locations().drop(columns=["index", "source_stem"]).set_index("source").transpose().reset_index()

    if geo_counts[study].isnull().values.any():
        return display(
            Markdown(
                "Geographical Coverage is currently unavailable for {}.".format(study)
                    ))

    else:
        # read polygons from UK geojson
        geo_polys = gpd.read_file("regions.geojson")

        geo_data = geo_polys.merge(geo_counts, left_on="id", right_on="index")

        # change to mercator projection
        geo_data2 = geo_data.to_crs(epsg=3857)

        # Explodes multi-polygons into polygons
        geo_data2 = geo_data2.explode(index_parts=True).reset_index(drop=True)

        # Extracts coordinates
        def get_coords(geometry):
            """Extract simple x and y lists from a single Polygon."""
            if geometry.geom_type == 'Polygon':
                x, y = geometry.exterior.xy
                return list(x), list(y)
            return [], []

        def generate_custom_palette(steps=256):
            # Start: White (255, 255, 255)
            # End: Midnight Blue (25, 25, 112)
            start_rgb = (255, 255, 255)
            end_rgb = (25, 25, 112)
            
            palette = []
            for i in range(steps):
                # Linear interpolation between start and end for each channel
                r = int(start_rgb[0] + (end_rgb[0] - start_rgb[0]) * i / (steps - 1))
                g = int(start_rgb[1] + (end_rgb[1] - start_rgb[1]) * i / (steps - 1))
                b = int(start_rgb[2] + (end_rgb[2] - start_rgb[2]) * i / (steps - 1))
                
                # Convert to hex string and append
                palette.append(f"#{r:02x}{g:02x}{b:02x}")
                
            return palette

        # Create the list
        ukllc_palette = generate_custom_palette()

        coords = geo_data2['geometry'].apply(get_coords)
        geo_data2['xs'] = coords.apply(lambda x: x[0])
        geo_data2['ys'] = coords.apply(lambda x: x[1])

        # drop geometry
        df_final = pd.DataFrame(geo_data2.drop(columns=["geometry"]))
        source = ColumnDataSource(df_final)

        # Plotting stuff
        color_mapper = LinearColorMapper(palette=ukllc_palette, 
                                        low=df_final[study].min(), 
                                        high=df_final[study].max())
        p = figure(title="Geographical coverage of {} participants".format(study), 
                tools="pan,wheel_zoom,reset,save",
                x_axis_location=None, y_axis_location=None,
                x_axis_type="mercator", y_axis_type="mercator",
                match_aspect=True) # match_aspect prevents the UK from looking 'stretched'

        p.patches('xs', 'ys', source=source,
                fill_color={'field': study, 'transform': color_mapper}, 
                fill_alpha=0.8,
                line_color="black", 
                line_width=0.5)

        # hovertool
        hover = HoverTool(tooltips=[
            ("Region", "@id"),
            ("Participants", "@{}".format(study))
        ])
        p.add_tools(hover)

        # color bar legend
        color_bar = ColorBar(color_mapper=color_mapper, label_standoff=12, border_line_color=None, location=(0,0))
        p.add_layout(color_bar, 'right')

        p.grid.grid_line_color = None
        show(p)

In [None]:
geo_counts = pd.read_csv("geo_locations.csv").drop(columns=["index", "source_stem"]).set_index("source").transpose().reset_index()

In [None]:
if geo_counts["NICOLA"].isnull().values.any():
    print("True")
else:
    print("False")

In [None]:
md.get_place_var_info()

In [None]:
API_KEY = os.environ['FASTAPI_KEY']

def get_geo_locations():
    url = ('https://metadata-api-4a09f2833a54.herokuapp.com/geo-locations/')
    r = requests.get(url, headers={'access_token': API_KEY})

    col_rename = {
        "east_of_england": "East of England",
        "south_east": "South East",
        "north_west": "North West",
        "east_midlands": "East Midlands",
        "west_midlands": "West Midlands",
        "south_west": "South West",
        "london": "London",
        "yorkshire_and_the_humber": "Yorkshire and The Humber",
        "north_east": "North East",
        "wales": "Wales",
        "scotland": "Scotland",
        "northern_ireland": "Northern Ireland"
        }
    
    return pd.json_normalize(json.loads(r.text)).rename(columns=col_rename)