In [159]:
import pandas as pd
import altair as alt
import numpy as np
from tqdm import tqdm
import sqlite3

import re
pd.options.display.max_columns = 15
pd.options.display.max_rows = 50
pd.set_option('display.max_colwidth', None)

alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

### Load data

In [160]:
with sqlite3.connect("../neuromancer/wintermute.db") as conn:
    df = pd.read_sql_query("SELECT * from obesity_hospitals", conn)
df.year.unique()

array([2015, 2016, 2017, 2018, 2019])

In [161]:
df = pd.read_csv("obesity.csv")
df

Unnamed: 0.1,Unnamed: 0,region,admissions_total,admissions_count_men,admissions_count_women,adm_per_100_000_all,adm_per_male,adm_per_female,year
0,5,England,365577,123423,242118,678.6808784502424,465.15091654776563,885.84569912726,2015
1,7,Unknown,84,35,49,.,.,.,2015
2,9,North East,18824,6340,12484,721.0931625244543,495.9114680700283,937.218239570219,2015
3,10,Darlington,383,154,229,363.39140005313294,300.6872852233677,422.6651901070506,2015
4,11,Durham,2521,925,1596,488.60660869025907,365.28634500406747,607.4654304212293,2015
...,...,...,...,...,...,...,...,...,...
807,177,Somerset,7075,2275,4800,1290,786,1800,2019
808,178,South Gloucestershire,5140,1450,3695,1881,1106,2663,2019
809,179,Swindon,2805,885,1920,1337,887,1784,2019
810,180,Torbay,2485,775,1710,1865,1072,2659,2019


In [162]:
region_list = ["East Midlands", "East of England", "West Midlands",
               "North East", "North West", "North Yorkshire","London", "South East", "South West", "West Sussex", "Yorkshire and the Humber"]

def england_region_sub(s):
    ### Create a flag for region, major region or England
    if s == "England":
        return "England"
    if s in region_list:
        return "Major region"
    else:
        return "Region"

df["region_flag"] = df["region"].map(england_region_sub)
#drop unknown row
df = df.loc[df["region"] != "Unknown"]
df.head(30)

Unnamed: 0.1,Unnamed: 0,region,admissions_total,admissions_count_men,admissions_count_women,adm_per_100_000_all,adm_per_male,adm_per_female,year,region_flag
0,5,England,365577,123423,242118,678.6808784502424,465.1509165477656,885.84569912726,2015,England
2,9,North East,18824,6340,12484,721.0931625244543,495.9114680700283,937.218239570219,2015,Major region
3,10,Darlington,383,154,229,363.39140005313294,300.6872852233677,422.6651901070506,2015,Region
4,11,Durham,2521,925,1596,488.6066086902591,365.2863450040674,607.4654304212293,2015,Region
5,12,Gateshead,1363,440,923,681.5068150681507,448.5264885472839,905.7988792824268,2015,Region
6,13,Hartlepool,506,172,334,546.0529865645066,380.35426018885033,703.987859371048,2015,Region
7,14,Middlesbrough,505,165,340,363.46886043515497,241.89646831156267,480.7148512611695,2015,Region
8,15,Newcastle-Upon-Tyne,2391,786,1605,833.6209691758972,545.0233680502586,1125.4706991942892,2015,Region
9,16,North Tyneside,2922,924,1998,1445.4469903834738,946.2171793716462,1911.9617224880385,2015,Region
10,17,Northumberland,4463,1642,2821,1413.209375376022,1064.5815909076173,1746.0248689398206,2015,Region


In [163]:
region_indicator = None

def create_major_region(row):
    global region_indicator
    if row['region_flag'] == 'England':
        return 'England'
    elif row['region_flag'] == 'Major region':
        region_indicator = row['region']
        return region_indicator
    else: # row['region_flag'] == 'Region'
        return region_indicator

df['major_region'] = df.apply(create_major_region, axis=1)
df.head(30)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['major_region'] = df.apply(create_major_region, axis=1)


Unnamed: 0.1,Unnamed: 0,region,admissions_total,admissions_count_men,admissions_count_women,adm_per_100_000_all,adm_per_male,adm_per_female,year,region_flag,major_region
0,5,England,365577,123423,242118,678.6808784502424,465.1509165477656,885.84569912726,2015,England,England
2,9,North East,18824,6340,12484,721.0931625244543,495.9114680700283,937.218239570219,2015,Major region,North East
3,10,Darlington,383,154,229,363.39140005313294,300.6872852233677,422.6651901070506,2015,Region,North East
4,11,Durham,2521,925,1596,488.6066086902591,365.2863450040674,607.4654304212293,2015,Region,North East
5,12,Gateshead,1363,440,923,681.5068150681507,448.5264885472839,905.7988792824268,2015,Region,North East
6,13,Hartlepool,506,172,334,546.0529865645066,380.35426018885033,703.987859371048,2015,Region,North East
7,14,Middlesbrough,505,165,340,363.46886043515497,241.89646831156267,480.7148512611695,2015,Region,North East
8,15,Newcastle-Upon-Tyne,2391,786,1605,833.6209691758972,545.0233680502586,1125.4706991942892,2015,Region,North East
9,16,North Tyneside,2922,924,1998,1445.4469903834738,946.2171793716462,1911.9617224880385,2015,Region,North East
10,17,Northumberland,4463,1642,2821,1413.209375376022,1064.5815909076173,1746.0248689398206,2015,Region,North East


### Create subsets and plot

In [164]:
england = df.loc[df["region_flag"] == "England"]
major_regions = df.loc[df["region_flag"] == "Major region"]


In [165]:
england_plot = alt.Chart(england).mark_bar().encode( 
    color=alt.value("orange"),
    x=alt.X("year:O", title="Year"),
    y=alt.Y("admissions_total:Q", title="Obesity hospital admissions"),
    tooltip=[alt.Tooltip("year:O", title="Year"), alt.Tooltip("admissions_total:Q", title="Total admissions", format=',')]
).properties(
    title="Obesity hospital admissions in England by year",
    width=150,
    height=400
).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)
england_plot

#### Admissions by sex


In [166]:
england['']

KeyError: ''

In [None]:

cols_to_convert = ['admissions_count_men', 'admissions_count_women', 'adm_per_100_000_all', 'adm_per_male', 'adm_per_female']

for col in cols_to_convert:
    england.loc[:, col] = england[col].astype(float)

england.loc[:, 'Percentage male'] = england['adm_per_male'] / england['adm_per_100_000_all']
england


Unnamed: 0.1,Unnamed: 0,region,admissions_total,admissions_count_men,admissions_count_women,adm_per_100_000_all,adm_per_male,adm_per_female,year,region_flag,major_region,Percentage male
0,5,England,365577,123423.0,242118.0,678.680878,465.150917,885.845699,2015,England,England,0.685375
163,6,England,440288,149490.0,290747.0,810.595387,558.356948,1055.595053,2016,England,England,0.688823
326,9,England,524725,175401.0,349279.0,995.0,710.0,1281.0,2017,England,England,0.713568
488,11,England,616961,208290.0,408630.0,1159.0,833.0,1486.0,2018,England,England,0.718723
650,11,England,710562,243565.0,466978.0,1322.8,963.1,1685.2,2019,England,England,0.728077


In [None]:
total_male = england['admissions_count_men'].sum()
total_female = england['admissions_count_women'].sum()
total = total_male + total_female

data = pd.DataFrame({
    'Gender': ['Male', 'Female'],
    'Admissions': [total_male, total_female],
    'Percentage': [total_male / total, total_female / total]
})

donut_chart_sexes = alt.Chart(data).mark_arc(innerRadius=50).encode(
    alt.Theta('Percentage:Q', stack=True),
    alt.Color('Gender:N', scale=alt.Scale(range=['#1f77b4', '#ff7f0e'])),
    tooltip=[alt.Tooltip('Gender:N'), alt.Tooltip('Percentage:Q', format='.2%')]
).properties(
    title='Total Proportion of Male vs Female Admissions over from 2015-19',
    width=150,
    height=150
)
donut_chart_sexes

In [None]:
line_chart_male = alt.Chart(england).mark_line(color='blue').encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('adm_per_male:Q', title='Admissions per 100,000'),
    tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('adm_per_male:Q', title='Admissions per 100,000 Male', format=',')]
)

line_chart_female = alt.Chart(england).mark_line(color='orange').encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('adm_per_female:Q', title='Admissions per 100,000'),
    tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('adm_per_female:Q', title='Admissions per 100,000 Female', format=',')]
)

line_chart = alt.layer(line_chart_male, line_chart_female).properties(title='Admissions per 100,000 by Gender from 2015 to 2019')
line_chart

In [None]:
# Calculate the base year values
base_year = england['year'].min()
base_male = england.loc[england['year'] == base_year, 'adm_per_male'].values[0]
base_female = england.loc[england['year'] == base_year, 'adm_per_female'].values[0]

# Create new columns for the rebased values
england['adm_per_male_rebased'] = england['adm_per_male'] / base_male * 100
england['adm_per_female_rebased'] = england['adm_per_female'] / base_female * 100

# Create the line charts
line_chart_male_rebased = alt.Chart(england).mark_line(color='blue', point=True).encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('adm_per_male_rebased:Q', title='Rebased Admissions per 100,000 Male'),
    tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('adm_per_male_rebased:Q', title='Rebased Admissions per 100,000 Male', format=',')]
)

line_chart_female_rebased = alt.Chart(england).mark_line(color='orange', point=True).encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('adm_per_female_rebased:Q', title='Rebased Admissions per 100,000 Female'),
    tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('adm_per_female_rebased:Q', title='Rebased Admissions per 100,000 Female', format=',')]
)

# Layer the charts
line_chart_rebased = alt.layer(line_chart_male_rebased, line_chart_female_rebased).properties(title='Rebased Admissions per 100,000 by Gender from 2015 to 2019')
line_chart_rebased

TypeError: unsupported operand type(s) for /: 'str' and 'str'

### Regional breakdown

In [None]:
major_regions_plot = alt.Chart(major_regions).mark_line(point=True).encode(
    x=alt.X("year:O", title="Year"),
    y=alt.Y("admissions_total:Q", title="Obesity hospital admissions"),
    color=alt.Color("major_region:N", title="Major region"),
    tooltip=[alt.Tooltip("year:O", title="Year"), 
             alt.Tooltip("admissions_total:Q", title="Total admissions", format=','),
             alt.Tooltip("major_region:N", title="Region")]
).properties(
    title="Obesity hospital admissions in major regions by year",
    width=150,
    height=400
).configure_axis(
    labelFontSize=12,
    titleFontSize=12
).configure_legend(
    labelFontSize=12,
    titleFontSize=12
)
major_regions_plot

### Let's look at the regional stats

In [None]:
# Create a dropdown selection for the major_region
major_region_dropdown = alt.binding_select(options=sub_regions['major_region'].unique().tolist())
major_region_selection = alt.selection_single(fields=['major_region'], bind=major_region_dropdown, name="Major region")

# Define the base line chart
base = alt.Chart(sub_regions).mark_line().encode(
    x=alt.X('year:Q', title='Year', axis=alt.Axis(format='d')),
    y=alt.Y('adm_per_100_000_all:Q', title='Admissions per 100,000 population', axis=alt.Axis(format='~s')),
    color=alt.condition(highlight, 'region:N', alt.value('lightgray'), legend=alt.Legend(title="Region")),
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.4))
).add_params(
    highlight
)

# Add a layer to display tooltips when hovering over the line
tooltip = base.mark_circle().encode(
    tooltip=[alt.Tooltip('region:N', title='Region'),
             alt.Tooltip('year:Q', title='Year', format='d'),
             alt.Tooltip('adm_per_100_000_all:Q', title='Admissions per 100,000 residents', format=',')],
    color=alt.condition(highlight, 'region:N', alt.value('lightgray'))
)

# Combine the base chart with the tooltip layer and add the dropdown selection
line = alt.layer(
    base, tooltip
).add_params(
    major_region_selection
).transform_filter(
    major_region_selection
).properties(
    title="Obesity-related hospital admissions between 2015 and 2019"
)

# Put the five layers into a chart and bind the data
line

In [None]:
(england_plot | donut_chart_sexes | line_chart | line_chart_rebased | major_regions_plot) & (line_chart)

ValueError: Objects with "config" attribute cannot be used within HConcatChart. Consider defining the config attribute in the HConcatChart object instead.

In [231]:
#PLOT 1 - England Plot

england_plot = alt.Chart(england).mark_bar().encode( 
    color=alt.value("orange"),
    x=alt.X("year:O", title="Year"),
    y=alt.Y("admissions_total:Q", title="Obesity hospital admissions"),
    tooltip=[alt.Tooltip("year:O", title="Year"), alt.Tooltip("admissions_total:Q", title="Total admissions", format=',')]
).properties(
    title="Obesity hospital admissions in England by year",
    width=300
)
#PLOT 2 - regional chart

# Create a new column 'Region' with the same values as 'major_region'
sub_regions['Region'] = sub_regions['major_region']

# Bind the dropdown to the new 'Region' column
major_region_dropdown = alt.binding_select(options=sub_regions['Region'].unique().tolist())
major_region_selection = alt.selection_single(fields=['Region'], bind=major_region_dropdown, name=" ")


# Define the base line chart
base = alt.Chart(sub_regions).mark_line().encode(
    x=alt.X('year:Q', title='Year', axis=alt.Axis(format='d')),
    y=alt.Y('adm_per_100_000_all:Q', title='Admissions per 100,000 population', axis=alt.Axis(format='~s')),
    color=alt.condition(highlight, 'region:N', alt.value('lightgray'), legend=alt.Legend(title="Region")),
    opacity=alt.condition(highlight, alt.value(1), alt.value(0.4))
).add_params(
    highlight
)

# Add a layer to display tooltips when hovering over the line
tooltip = base.mark_circle().encode(
    tooltip=[alt.Tooltip('region:N', title='Region'),
             alt.Tooltip('year:Q', title='Year', format='d'),
             alt.Tooltip('adm_per_100_000_all:Q', title='Admissions per 100,000 residents', format=',')],
    color=alt.condition(highlight, 'region:N', alt.value('lightgray'))
)

# Combine the base chart with the tooltip layer and add the dropdown selection
line = alt.layer(
    base, tooltip
).add_params(
    major_region_selection
).transform_filter(
    major_region_selection
).properties(
    title="Obesity-related hospital admissions between 2015 and 2019",
    width=600,
    height = 500
)

#PLOT 3 - major regions plot

major_regions_plot = alt.Chart(major_regions).mark_line(point=True).encode(
    x=alt.X("year:O", title="Year"),
    y=alt.Y("admissions_total:Q", title="Obesity hospital admissions"),
    color=alt.Color("major_region:N", title="Major region"),
    tooltip=[alt.Tooltip("year:O", title="Year"), 
             alt.Tooltip("admissions_total:Q", title="Total admissions", format=','),
             alt.Tooltip("major_region:N", title="Region")]
).properties(
    title="Obesity hospital admissions in major regions by year",
    width=150
)

#PLOT 4 - donut chart of the sexes  

donut_chart_sexes = alt.Chart(data).mark_arc(innerRadius=50).encode(
    alt.Theta('Percentage:Q', stack=True),
    alt.Color('Gender:N', scale=alt.Scale(range=['#1f77b4', '#ff7f0e'])),
    tooltip=[alt.Tooltip('Gender:N'), alt.Tooltip('Percentage:Q', format='.2%')]
).properties(
    title='Total Proportion of Male vs Female Admissions from 2015-19',
    width = 300, 
    height=150
)

#PLOT 5 - male vs female line chart

line_chart_male = alt.Chart(england).mark_line(color='blue').encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('adm_per_male:Q', title='Admissions per 100,000 population'),
    tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('adm_per_male:Q', title='Admissions per 100,000 Male', format=',')]
)

line_chart_female = alt.Chart(england).mark_line(color='orange').encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('adm_per_female:Q'),
    tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('adm_per_female:Q', format=',')]
)

line_chart = alt.layer(line_chart_male, line_chart_female).properties(title='Admissions per 100,000 by Gender from 2015 to 2019',
                                                                      width=200)


#PLOT 6 - rebased line chart





# Calculate the base year values
base_year = england['year'].min()
base_male = england.loc[england['year'] == base_year, 'adm_per_male'].values[0]
base_female = england.loc[england['year'] == base_year, 'adm_per_female'].values[0]

# Create new columns for the rebased values
base_male = float(base_male)
base_female = float(base_female)

england['adm_per_male_rebased'] = england['adm_per_male'] / base_male * 100
england['adm_per_female_rebased'] = england['adm_per_female'] / base_female * 100

# Create the line charts
line_chart_male_rebased = alt.Chart(england).mark_line(color='blue', point=True).encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('adm_per_male_rebased:Q', title='Rebased Admissions per 100,000 population'),
    tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('adm_per_male_rebased:Q', format=',')]
)

line_chart_female_rebased = alt.Chart(england).mark_line(color='orange', point=True).encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('adm_per_female_rebased:Q'),
    tooltip=[alt.Tooltip('year:O', title='Year'), alt.Tooltip('adm_per_female_rebased:Q', title='Rebased Admissions per 100,000 Female', format=',')]
)

# Layer the charts
line_chart_rebased = alt.layer(line_chart_male_rebased, line_chart_female_rebased).properties(title='Rebased Admissions per 100,000 by gender from 2015 to 2019',
                                                                                              width=200)




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  england['adm_per_male_rebased'] = england['adm_per_male'] / base_male * 100
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  england['adm_per_female_rebased'] = england['adm_per_female'] / base_female * 100


In [232]:
donut_chart_sexes.save("donut_sexes.html")

In [233]:
line.save("regions_dropdown.html")

In [234]:
(england_plot | major_regions_plot) .save("england_major_regions.html")

In [236]:
(line_chart | line_chart_rebased).save("line_charts_sexes.html")