# Chapter 6: Measures of location

For additional context see chapter 6 of [The Virus of Variation & Process Behavior Charts: A Guide for the Perplexed](https://www.brokenquality.com/book).

In [92]:
# Import libraries
from matplotlib import pyplot as plt
import numpy as np
import seaborn as sns
import pandas as pd

%matplotlib inline

## Get data

This chapter uses two datasets. The first dataset is the death-to-birth ratios for Vienna General called `01-vienna-general-yearly-births-deaths-ratio-by-clinic-data.csv`. The second dataset is the annual salaries at Vienna General called `06-annual-salaries-at-vienna-general-data`.

In [62]:
# Dataset URLs
vienna_ratio_data_url = r'https://raw.githubusercontent.com/jimlehner/the-virus-of-variation-and-pbcs/refs/heads/main/data/01-vienna-general-yearly-births-deaths-ratio-by-clinic-data.csv'
salary_url = r'https://raw.githubusercontent.com/jimlehner/the-virus-of-variation-and-pbcs/refs/heads/main/data/06-annual-salaries-at-vienna-general-data.csv'

# Get data function
def get_data(url) -> pd.DataFrame:
    return pd.read_csv(url)

# Get death-to-birth ratio data
ratio_df = get_data(vienna_ratio_data_url)
# Make 'Year' column dtype string
ratio_df['Year'] = ratio_df['Year'].astype('string')
# Drop 'Note' column
ratio_df = ratio_df.drop('Note', axis=1)

# Get causes of death in Dublin dataset
salary_df = get_data(salary_url)

# Specify dtypes
salary_df['Position'] = salary_df['Position'].astype('string')

In [63]:
# Get just Vienna General dataa for 1844 to 1848
vienna_births = ratio_df.iloc[11:16][['Year','Births']].set_index('Year')
vienna_births

Unnamed: 0_level_0,Births
Year,Unnamed: 1_level_1
1844,3157
1845,3492
1846,4010
1847,3490
1848,3556


## Table 6.1: Mean births at Vienna General from 1844 to 1848

In [64]:
# Apply aggregation
mean_df = vienna_births.agg(['sum', 'count', 'mean']) \
                     .round(1)

# Rename row labels in index
mean_df = mean_df.rename(index={'sum': 'Sum', 
                                        'Count': 'Count', 
                                        'mean': 'Mean'})

# Create table 6.1 by combining dataframes
table_6_1 = pd.concat([vienna_births, mean_df])
table_6_1 

Unnamed: 0,Births
1844,3157.0
1845,3492.0
1846,4010.0
1847,3490.0
1848,3556.0
Sum,17705.0
count,5.0
Mean,3541.0


## Table 6.2: Table of annual salaries at Vienna General

In [65]:
# Sort salary_df by Annual salary in descending order
salary_df = (
    salary_df.sort_values(by='Annual Salary', ascending=False)
    .set_index('Position')
    )

# Show results
salary_df

Unnamed: 0_level_0,Annual Salary
Position,Unnamed: 1_level_1
Chief Physician,2000
Head of Obstetrics Clinic,1500
Assistant Physician,400
Medical Intern,125
Medical Intern,100
Nurse,65
Nurse,65
Midwife,50
Orderlie,35
Orderlie,30


In [66]:
# Calculate mean 
mean_df = salary_df.agg(['mean']).rename(index={'mean':'Mean'})

# Show results
mean_df

Unnamed: 0,Annual Salary
Mean,437.0


In [67]:
# Combine dataframes
table_6_2 = pd.concat([salary_df, mean_df]).rename_axis('Position')

# Show results
table_6_2

Unnamed: 0_level_0,Annual Salary
Position,Unnamed: 1_level_1
Chief Physician,2000.0
Head of Obstetrics Clinic,1500.0
Assistant Physician,400.0
Medical Intern,125.0
Medical Intern,100.0
Nurse,65.0
Nurse,65.0
Midwife,50.0
Orderlie,35.0
Orderlie,30.0


## Table 6.3: Table of annual salaries at Vienna General with median

In [90]:
# Apply aggregation
median_df = salary_df.agg(['median']) \
                     .round(1)

# Rename row labels in index
median_df = (median_df.rename(index={'median': 'Median'})
             .rename(columns={'Births':'Annual Salary'}))
median_df

Unnamed: 0,Annual Salary
Median,82.5


In [91]:
# Combine dataframes
table_6_3 = pd.concat([salary_df, median_df]).rename_axis('Position')

# Show results
table_6_3

Unnamed: 0_level_0,Annual Salary
Position,Unnamed: 1_level_1
Chief Physician,2000.0
Head of Obstetrics Clinic,1500.0
Assistant Physician,400.0
Medical Intern,125.0
Medical Intern,100.0
Nurse,65.0
Nurse,65.0
Midwife,50.0
Orderlie,35.0
Orderlie,30.0
