# Chapter 3: Measures of location
For additional context see chapter 3 of *The Virus of Variation*. Questions? Email James.Lehner@gmail.com or QualityIsBroken@gmail.com.

In [1]:
# Import libraries
from matplotlib import pyplot as plt
from process_improvement import comparison_charts as cc
import numpy as np
import seaborn as sns
import pandas as pd

%matplotlib inline

## Get the data
Note that this chapter uses two datasets. The first dataset is the death-to-birth rates at Vienna General by clinic (vienna-general-death-to-birth-rates-by-clinic.csv). The second dataset is the annual salaries for the obstetrics department at Vienna General (obstetrics-annual-salaries-in-1845.csv). Both datasets can be found at https://github.com/jimlehner/the-virus-of-variation/tree/main/data.

### Death-to-birth rates by clinic 

In [23]:
# Dataset URLs
rate_url = r'https://raw.githubusercontent.com/jimlehner/the-virus-of-variation/refs/heads/main/data/vienna-general-death-to-birth-rates-by-clinic.csv'

# Get data function
def get_data(url) -> pd.DataFrame:
    return pd.read_csv(url)

# Get death-to-birth ratio data
rate_df = get_data(rate_url)

# Make 'Year' column dtype string
rate_df['Year'] = rate_df['Year'].astype('string')

# Drop 'Note' column
rate_df = rate_df.drop('Note', axis=1)

# Show dataframe
rate_df.head()

Unnamed: 0,Hospital,Clinic,Employee,Year,Births,Deaths,Rate
0,Vienna General Hospital,1st Clinic,Students & Doctors,1833,3737,197,5.3
1,Vienna General Hospital,1st Clinic,Students & Doctors,1834,2657,205,7.7
2,Vienna General Hospital,1st Clinic,Students & Doctors,1835,2573,143,5.6
3,Vienna General Hospital,1st Clinic,Students & Doctors,1836,2677,200,7.5
4,Vienna General Hospital,1st Clinic,Students & Doctors,1837,2765,251,9.1


### Obstetrics department annual salaries data

In [None]:
# Salary_url
salary_url = r'https://raw.githubusercontent.com/jimlehner/the-virus-of-variation/refs/heads/main/data/obstetrics-annual-salaries-in-1845.csv'

# Get data function
def get_data(url) -> pd.DataFrame:
    return pd.read_csv(url)

# Get causes of death in Dublin dataset
salary_df = get_data(salary_url)

# Specify dtypes
salary_df['Position'] = salary_df['Position'].astype('string')

# Show dataframe
salary_df

## Table 3.1: Mean of death-to-birth rates from 1844 to 1848

In [31]:
# Create dataframe of just births for 1844 to 1848 (inclusive)
vienna_births_df = rate_df.iloc[11:16][['Year','Births']].set_index('Year')

# Show dataframe
vienna_births_df

Unnamed: 0_level_0,Births
Year,Unnamed: 1_level_1
1844,3157
1845,3492
1846,4010
1847,3490
1848,3556


In [75]:
# Calculate sum, count, and mean for vienna_births_df
agg_stats_df = (vienna_births_df.agg(['sum','count','mean'])
                .round()
                .rename(index={'sum':'Sum', 
                               'count':'Count',
                               'mean':'Mean'}))

# Create dataframe of Table 3.1
table_3_1 = (pd.concat([vienna_births_df, agg_stats_df])
             .reset_index(drop=False)
             .rename(columns={'index':'Year'}))
table_3_1

Unnamed: 0,Year,Births
0,1844,3157.0
1,1845,3492.0
2,1846,4010.0
3,1847,3490.0
4,1848,3556.0
5,Sum,17705.0
6,Count,5.0
7,Mean,3541.0


## Table 3.2:  Mean salary in obstetrics department at Vienna General in 1845

In [69]:
# Sort salary_df by 'Annual Salary' column
sorted_salary_df = (salary_df
                    .sort_values(by='Annual Salary', ascending=False)\
                    .set_index('Position'))

# Show sorted dataframe
sorted_salary_df

Unnamed: 0_level_0,Annual Salary
Position,Unnamed: 1_level_1
Chief Physician,2000
Head of Obstetrics Clinic,1500
Assistant Physician,400
Medical Intern,125
Medical Intern,100
Nurse,65
Nurse,65
Midwife,50
Orderlie,35
Orderlie,30


In [70]:
# Calculate the mean annual salary
mean_df = (sorted_salary_df['Annual Salary']
           .agg(['mean'])
           .rename(index={'mean':'Mean'})
           .to_frame())

# Show dataframe
mean_df
# print(type(mean_df))

Unnamed: 0,Annual Salary
Mean,437.0


In [72]:
# Create Table 3.2
table_3_2 = (pd.concat([sorted_salary_df, mean_df])
             .reset_index(drop=False)
             .rename(columns={'index':'Position'}))

# Show dataframe
table_3_2

Unnamed: 0,Position,Annual Salary
0,Chief Physician,2000.0
1,Head of Obstetrics Clinic,1500.0
2,Assistant Physician,400.0
3,Medical Intern,125.0
4,Medical Intern,100.0
5,Nurse,65.0
6,Nurse,65.0
7,Midwife,50.0
8,Orderlie,35.0
9,Orderlie,30.0


## Table 3.3: Median salary in obstetrics department at Vienna General in 1845

In [56]:
# Apply aggregation
median_df = (sorted_salary_df.agg(['median'])
                     .round(1))

# Rename row labels in index
median_df = (median_df.rename(index={'median': 'Median'})
             .rename(columns={'Births':'Annual Salary'}))

# Show dataframe
median_df

Unnamed: 0,Annual Salary
Median,82.5


In [73]:
# Create Table 3.3 dataframe
table_3_3 = (pd.concat([sorted_salary_df, median_df])
             .rename_axis('Position').reset_index(drop=False))
table_3_3

Unnamed: 0,Position,Annual Salary
0,Chief Physician,2000.0
1,Head of Obstetrics Clinic,1500.0
2,Assistant Physician,400.0
3,Medical Intern,125.0
4,Medical Intern,100.0
5,Nurse,65.0
6,Nurse,65.0
7,Midwife,50.0
8,Orderlie,35.0
9,Orderlie,30.0
