Skip to content

centenarian and supercentenarian dataset (verified oldest people)

Notifications You must be signed in to change notification settings

frankiethull/centenarians

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

2 Commits
 
 
 
 
 
 
 
 
 
 

Repository files navigation

Centenarians

fth

loading packages

library(dplyr)
library(tidyr)
library(ggplot2)
library(stringr)
library(lubridate)
library(gt)
library(rvest)

get data

# data loc: 
root <- paste0("https://en.wikipedia.org/wiki/List_of_verified_oldest_people") 
tables <- read_html(root) |> html_nodes("table")

# get data by gender: 
old_women <- tables[[1]] |> 
  html_table(header = TRUE) |>
  mutate(
    gender = 'female'
  )

old_men <- tables[[2]] |> 
  html_table(header = TRUE) |>
  mutate(
    gender = 'male'
  )

wrangling

# fix headers: 
cols <- c('rank', 'name', 'birth_date', 'death_date', 'age', 'place_of_death_or_residence', 'gender')

colnames(old_women) <- cols
colnames(old_men)   <- cols

# bind all ppl: 
old_ppl <- old_women |> rbind(old_men)


# remove [reference] indices, fix date coltypes & age calc in year fractions:
old_ppl <-
old_ppl |>
  mutate( # regex
    name = str_replace(name, "\\[.*]", ""),
    birth_date = str_replace(birth_date, "\\[.*]", ""),
    death_date = str_replace(death_date, "\\[.*]", ""),
    place_of_death_or_residence = str_replace(place_of_death_or_residence, "\\[.*]", "")
  ) |>
  mutate( # dates & age formatting
    birth_date = dmy(birth_date),
    death_date = dmy(death_date),
    still_alive = ifelse(is.na(death_date), "alive", "deceased"),
    age  = ifelse(still_alive == "alive", 
                  as.numeric(interval(birth_date, Sys.Date()), 'years'), 
                  as.numeric(interval(birth_date, death_date), 'years'))
    )

eda

old_ppl |>
  ggplot() + 
  geom_point(aes(x = birth_date, y = age, color = place_of_death_or_residence #, shape = still_alive
                 ), size = 5, alpha = .7) + 
  scale_color_viridis_d(option = "A", begin = .2) +
facet_wrap(~gender) + 
  labs(
    title = "Oldest Verified People",
    subtitle = "a breakdown by gender & country",
    x = "Birth Date",
    y = "Age"
  ) + 
  theme_minimal() +
  ggdark::dark_mode() + 
  theme(
    panel.background = element_rect(fill = "grey5", color = "grey5"),
    legend.title = element_blank() #,
#    legend.position = "bottom"
  )

old_ppl |>
  arrange(desc(age)) |>
  slice(1:20) |>
  select(rank, name, gender, age, place_of_death_or_residence, birth_date) |> 
  rename('place of residence' = 'place_of_death_or_residence',
         'birth date' = 'birth_date') |>
  mutate(
    age = round(age)
  ) |>
  gt() |>
  tab_header(title = md('**Oldest Verified People Dataset**'),
             subtitle = md('top 20 oldest people')) |>
  tab_footnote(
    md("*top 20 oldest people are all female*")
  )
<style>html { font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, Oxygen, Ubuntu, Cantarell, 'Helvetica Neue', 'Fira Sans', 'Droid Sans', Arial, sans-serif; }

#oywycgjgjv .gt_table { display: table; border-collapse: collapse; margin-left: auto; margin-right: auto; color: #333333; font-size: 16px; font-weight: normal; font-style: normal; background-color: #FFFFFF; width: auto; border-top-style: solid; border-top-width: 2px; border-top-color: #A8A8A8; border-right-style: none; border-right-width: 2px; border-right-color: #D3D3D3; border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #A8A8A8; border-left-style: none; border-left-width: 2px; border-left-color: #D3D3D3; }

#oywycgjgjv .gt_heading { background-color: #FFFFFF; text-align: center; border-bottom-color: #FFFFFF; border-left-style: none; border-left-width: 1px; border-left-color: #D3D3D3; border-right-style: none; border-right-width: 1px; border-right-color: #D3D3D3; }

#oywycgjgjv .gt_caption { padding-top: 4px; padding-bottom: 4px; }

#oywycgjgjv .gt_title { color: #333333; font-size: 125%; font-weight: initial; padding-top: 4px; padding-bottom: 4px; padding-left: 5px; padding-right: 5px; border-bottom-color: #FFFFFF; border-bottom-width: 0; }

#oywycgjgjv .gt_subtitle { color: #333333; font-size: 85%; font-weight: initial; padding-top: 0; padding-bottom: 6px; padding-left: 5px; padding-right: 5px; border-top-color: #FFFFFF; border-top-width: 0; }

#oywycgjgjv .gt_bottom_border { border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #D3D3D3; }

#oywycgjgjv .gt_col_headings { border-top-style: solid; border-top-width: 2px; border-top-color: #D3D3D3; border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #D3D3D3; border-left-style: none; border-left-width: 1px; border-left-color: #D3D3D3; border-right-style: none; border-right-width: 1px; border-right-color: #D3D3D3; }

#oywycgjgjv .gt_col_heading { color: #333333; background-color: #FFFFFF; font-size: 100%; font-weight: normal; text-transform: inherit; border-left-style: none; border-left-width: 1px; border-left-color: #D3D3D3; border-right-style: none; border-right-width: 1px; border-right-color: #D3D3D3; vertical-align: bottom; padding-top: 5px; padding-bottom: 6px; padding-left: 5px; padding-right: 5px; overflow-x: hidden; }

#oywycgjgjv .gt_column_spanner_outer { color: #333333; background-color: #FFFFFF; font-size: 100%; font-weight: normal; text-transform: inherit; padding-top: 0; padding-bottom: 0; padding-left: 4px; padding-right: 4px; }

#oywycgjgjv .gt_column_spanner_outer:first-child { padding-left: 0; }

#oywycgjgjv .gt_column_spanner_outer:last-child { padding-right: 0; }

#oywycgjgjv .gt_column_spanner { border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #D3D3D3; vertical-align: bottom; padding-top: 5px; padding-bottom: 5px; overflow-x: hidden; display: inline-block; width: 100%; }

#oywycgjgjv .gt_group_heading { padding-top: 8px; padding-bottom: 8px; padding-left: 5px; padding-right: 5px; color: #333333; background-color: #FFFFFF; font-size: 100%; font-weight: initial; text-transform: inherit; border-top-style: solid; border-top-width: 2px; border-top-color: #D3D3D3; border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #D3D3D3; border-left-style: none; border-left-width: 1px; border-left-color: #D3D3D3; border-right-style: none; border-right-width: 1px; border-right-color: #D3D3D3; vertical-align: middle; text-align: left; }

#oywycgjgjv .gt_empty_group_heading { padding: 0.5px; color: #333333; background-color: #FFFFFF; font-size: 100%; font-weight: initial; border-top-style: solid; border-top-width: 2px; border-top-color: #D3D3D3; border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #D3D3D3; vertical-align: middle; }

#oywycgjgjv .gt_from_md > :first-child { margin-top: 0; }

#oywycgjgjv .gt_from_md > :last-child { margin-bottom: 0; }

#oywycgjgjv .gt_row { padding-top: 8px; padding-bottom: 8px; padding-left: 5px; padding-right: 5px; margin: 10px; border-top-style: solid; border-top-width: 1px; border-top-color: #D3D3D3; border-left-style: none; border-left-width: 1px; border-left-color: #D3D3D3; border-right-style: none; border-right-width: 1px; border-right-color: #D3D3D3; vertical-align: middle; overflow-x: hidden; }

#oywycgjgjv .gt_stub { color: #333333; background-color: #FFFFFF; font-size: 100%; font-weight: initial; text-transform: inherit; border-right-style: solid; border-right-width: 2px; border-right-color: #D3D3D3; padding-left: 5px; padding-right: 5px; }

#oywycgjgjv .gt_stub_row_group { color: #333333; background-color: #FFFFFF; font-size: 100%; font-weight: initial; text-transform: inherit; border-right-style: solid; border-right-width: 2px; border-right-color: #D3D3D3; padding-left: 5px; padding-right: 5px; vertical-align: top; }

#oywycgjgjv .gt_row_group_first td { border-top-width: 2px; }

#oywycgjgjv .gt_summary_row { color: #333333; background-color: #FFFFFF; text-transform: inherit; padding-top: 8px; padding-bottom: 8px; padding-left: 5px; padding-right: 5px; }

#oywycgjgjv .gt_first_summary_row { border-top-style: solid; border-top-color: #D3D3D3; }

#oywycgjgjv .gt_first_summary_row.thick { border-top-width: 2px; }

#oywycgjgjv .gt_last_summary_row { padding-top: 8px; padding-bottom: 8px; padding-left: 5px; padding-right: 5px; border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #D3D3D3; }

#oywycgjgjv .gt_grand_summary_row { color: #333333; background-color: #FFFFFF; text-transform: inherit; padding-top: 8px; padding-bottom: 8px; padding-left: 5px; padding-right: 5px; }

#oywycgjgjv .gt_first_grand_summary_row { padding-top: 8px; padding-bottom: 8px; padding-left: 5px; padding-right: 5px; border-top-style: double; border-top-width: 6px; border-top-color: #D3D3D3; }

#oywycgjgjv .gt_striped { background-color: rgba(128, 128, 128, 0.05); }

#oywycgjgjv .gt_table_body { border-top-style: solid; border-top-width: 2px; border-top-color: #D3D3D3; border-bottom-style: solid; border-bottom-width: 2px; border-bottom-color: #D3D3D3; }

#oywycgjgjv .gt_footnotes { color: #333333; background-color: #FFFFFF; border-bottom-style: none; border-bottom-width: 2px; border-bottom-color: #D3D3D3; border-left-style: none; border-left-width: 2px; border-left-color: #D3D3D3; border-right-style: none; border-right-width: 2px; border-right-color: #D3D3D3; }

#oywycgjgjv .gt_footnote { margin: 0px; font-size: 90%; padding-left: 4px; padding-right: 4px; padding-left: 5px; padding-right: 5px; }

#oywycgjgjv .gt_sourcenotes { color: #333333; background-color: #FFFFFF; border-bottom-style: none; border-bottom-width: 2px; border-bottom-color: #D3D3D3; border-left-style: none; border-left-width: 2px; border-left-color: #D3D3D3; border-right-style: none; border-right-width: 2px; border-right-color: #D3D3D3; }

#oywycgjgjv .gt_sourcenote { font-size: 90%; padding-top: 4px; padding-bottom: 4px; padding-left: 5px; padding-right: 5px; }

#oywycgjgjv .gt_left { text-align: left; }

#oywycgjgjv .gt_center { text-align: center; }

#oywycgjgjv .gt_right { text-align: right; font-variant-numeric: tabular-nums; }

#oywycgjgjv .gt_font_normal { font-weight: normal; }

#oywycgjgjv .gt_font_bold { font-weight: bold; }

#oywycgjgjv .gt_font_italic { font-style: italic; }

#oywycgjgjv .gt_super { font-size: 65%; }

#oywycgjgjv .gt_footnote_marks { font-style: italic; font-weight: normal; font-size: 75%; vertical-align: 0.4em; }

#oywycgjgjv .gt_asterisk { font-size: 100%; vertical-align: 0; }

#oywycgjgjv .gt_indent_1 { text-indent: 5px; }

#oywycgjgjv .gt_indent_2 { text-indent: 10px; }

#oywycgjgjv .gt_indent_3 { text-indent: 15px; }

#oywycgjgjv .gt_indent_4 { text-indent: 20px; }

#oywycgjgjv .gt_indent_5 { text-indent: 25px; } </style>

Oldest Verified People Dataset
top 20 oldest people
rank name gender age place of residence birth date
1 Jeanne Calment female 122 France 1875-02-21
2 Kane Tanaka female 119 Japan 1903-01-02
3 Sarah Knauss female 119 United States 1880-09-24
4 Lucile Randon female 119 France 1904-02-11
5 Nabi Tajima female 118 Japan 1900-08-04
6 Marie-Louise Meilleur female 118 Canada 1880-08-29
7 Violet Brown female 118 Jamaica 1900-03-10
8 Emma Morano female 117 Italy 1899-11-29
9 Chiyo Miyako female 117 Japan 1901-05-02
10 Delphia Welford female 117 United States 1875-09-09
11 Misao Okawa female 117 Japan 1898-03-05
12 Francisca Celsa dos Santos female 117 Brazil 1904-10-21
13 María Capovilla female 117 Ecuador 1889-09-14
14 Susannah Mushatt Jones female 117 United States 1899-07-06
15 Gertrude Weaver female 117 United States 1898-07-04
16 Antonia da Santa Cruz female 117 Brazil 1905-06-13
17 Tane Ikai female 116 Japan 1879-01-18
18 Jeanne Bot female 116 France 1905-01-14
19 Elizabeth Bolden female 116 United States 1890-08-15
20 Besse Cooper female 116 United States 1896-08-26
top 20 oldest people are all female

About

centenarian and supercentenarian dataset (verified oldest people)

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages