In [1]:
import pandas as pd
from indralib.indra_time import IndraTime
import re

In [2]:

url = "https://en.wikipedia.org/wiki/Timeline_of_human_evolution"
tables = pd.read_html(url)

In [3]:
useful = list(range(39,48))

In [4]:
useful

[39, 40, 41, 42, 43, 44, 45, 46, 47]

In [5]:
tables[47]

Unnamed: 0,Date,Event
0,300–130 ka,Reconstruction of early Homo sapiens from Jebe...
1,130–80 ka,Marine Isotope Stage 5 (Eemian). Modern human ...
2,80–50 ka,"MIS 4, beginning of the Upper Paleolithic. Ear..."
3,50–25 ka,Reconstruction of Oase 2 (c. 40 ka) Behavioral...
4,after 25 ka,Reconstruction of a Neolithic farmer from Euro...


In [6]:
def date_clean(date, default_scale = None):
    remarks = ""
    if isinstance(date, str):
        date = date.replace("–", "-").replace(",", "").replace("\xa0", " ").strip()
        date_sub = date.split("-")
        if len(date_sub)==3 or (len(date_sub)==2 and " - " not in date):
            return date, remarks
        date = date.split("[")[0]
        if default_scale is not None:
            date = date.split("-")
            if len(date) == 1:
                date = date[0].strip()+f" {default_scale}"
            else:
                date = f"{date[0].strip()} {default_scale} - {date[1].strip()} {default_scale}"
        else:
            date = date.replace("-", " - ")
            dates = date.split(" - ")
            if len(dates) > 1:
                sub_dates1 = dates[1].split(" ")
                sub_dates2 = dates[0].split(" ")
                if len(sub_dates1)>1 and len(sub_dates2)==1:
                    date = f"{dates[0]} {sub_dates1[1]} - {dates[1]}"
    else:
        if default_scale is not None:
            date = f"{date} {default_scale}"
        else:
            date = str(date)
    approxies = ["ca.", "c.", "circa", "around", "approximately", "~", ">", "<", "≈", "≥", "≤", "?"]
    for ap in approxies:
        if ap in date:
            date = date.replace(ap, "").strip()
            if date != "":
                remarks = f"{ap}"
    if "±" in date:
        dates = date.split("±")
        date = dates[0].strip()
        append = dates[1].strip().split(" ")
        if len(append) > 1:
            date = date + " " + append[1]
            remarks = "±" + append[0]
        else:
            remark = "±" + dates[1].strip()
                 
    jd_dates = IndraTime.string_time_2_julian(date)
    if len(jd_dates) > 1 and jd_dates[1] is not None:
        date_start = IndraTime.julian_2_string_time(jd_dates[0])
        date_end = IndraTime.julian_2_string_time(jd_dates[1])
        date = f"{date_start} - {date_end}"
    else:
        date = IndraTime.julian_2_string_time(jd_dates[0])

    return date, remarks

In [7]:
def date_merge(year, rest):
    day = None
    month = None
    rest = rest.strip()
    year = year.strip()
    if rest is not None and len(rest) > 0:
        rparts = rest.split(" ")
        if len(rparts) == 1:
            month = rparts[0].strip().lower()
        else:
            try:
                month = rparts[1].strip().lower()
                day = int(rparts[0].strip())
            except ValueError:
                month = None
                day = None
            if month is None and day is None:
                try:
                    month = rparts[0].strip().lower()
                    day = int(rparts[1].strip())
                except ValueError:
                    month = None
                    day = None
    else:
        month = None
        day = None
    if month is not None:
        val_months = ["january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"]
        try:
            month_id = val_months.index(month) + 1
        except ValueError:
            month_id = None
            return f"{year}"
        year_parts = str(year).replace("\xa0"," ").split(" ", 1)
        if len(year_parts) > 1:
            year = year_parts[0]
            appendix = year_parts[1]
        else:
            appendix = ""
        if day is not None:
            date = f"{year}-{month_id:02d}-{day:02d}"
            if appendix != "":
                date = date + f" {appendix}"
        else:
            date = f"{year}-{month_id:02d}"
            if appendix != "":
                date = date + f" {appendix}"
        return date
    else:
        return f"{year}"

In [8]:
def remove_footnotes(text, numeric_only=True, single_letter_alpha=True):
    if numeric_only is False:
        text = re.sub(r"\[.*?\]", "", text)
    else:
        text = re.sub(r"\[\d+\]", "", text)
        if single_letter_alpha is True:
            text = re.sub(r"\[\w\]", "", text)
    return text

In [41]:
print(f"| Date | Common name, taxonomy ranking of Homo Sapiens (first appearance) | Rank | Name |")
print("| ---- | ---- | ---- | ---- |")
for i, row in enumerate(tables[0].iterrows()):
    date = row[1].iloc[3]
    # check for nan:
    if pd.isna(date):
        continue
    date, remarks = date_clean(date, "Ma BP")
    rank = row[1].iloc[0]
    if pd.isna(rank):
        rank = ""
    name = row[1].iloc[1]
    if pd.isna(name):
        name = ""
    common = row[1].iloc[2]
    if pd.isna(common):
        common = ""
    if remarks != "":
        common = f"Date: {remarks}, " + common
    print(f"| {date} | {common} | {rank} | {name} |")

| Date | Common name, taxonomy ranking of Homo Sapiens (first appearance) | Rank | Name |
| ---- | ---- | ---- | ---- |
| 4200.0 Ma BP |  |  | Life |
| 3700.0 Ma BP |  |  | Archaea |
| 2100.0 Ma BP | Eukaryotes | Domain | Eukaryota |
| 1540.0 Ma BP | Excludes Plants and their relatives |  | Opimoda |
| 1300.0 Ma BP | Holozoa + Holomycota (Cristidicoidea and Fungi) |  | Opisthokonts |
| 1100.0 Ma BP | Excludes Holomycota |  | Holozoa |
| 900.0 Ma BP | Choanoflagellates + Animals |  | Choanozoa |
| 610.0 Ma BP | Animals | Kingdom | Animalia |
| 560.0 Ma BP | Triploblasts / Worms |  | Bilateria |
| 530.0 Ma BP | Chordates (Vertebrates and closely related invertebrates) | Phylum | Chordata |
| 505.0 Ma BP | Fish / Vertebrates | Subphylum | Vertebrata |
| 460.0 Ma BP | Jawed fish | Infraphylum | Gnathostomata |
| 420.0 Ma BP | Bony fish |  | Teleostomi |
| 395.0 Ma BP | Tetrapods (animals with four limbs) | Superclass | Tetrapoda |
| 340.0 Ma BP | Amniotes (fully terrestrial tetrapods whose

In [42]:
print("| Date | Human evolution |")
print("| ---- | ---- |")
for index in useful:
    for i, row in enumerate(tables[index].iterrows()):
        date = row[1].iloc[0]
        date = date.replace("after ", "")
        desc = row[1].iloc[1]
        date, remarks = date_clean(date)
        if remarks != "":
            desc = f"Date: {remarks}, " + desc
        print(f"| {date} | {desc} |")

    

| Date | Human evolution |
| ---- | ---- |
| 4.3-4.1 Ga | The earliest life appears, possibly as protocells. Their genetic material was probably composed of RNA, capable of both self replication and enzymatic activity; their membranes were composed of lipids. The genes were separate strands, translated into proteins and often exchanged between the protocells. Further information: Abiogenesis, RNA world, and Earliest known life forms |
| 4.0-3.8 Ga | Prokaryotic cells appear; their genetic materials are composed of the more stable DNA and they use proteins for various reasons, primarily for aiding DNA to replicate itself by proteinaceous enzymes (RNA now acts as an intermediary in this central dogma of genetic information flow of cellular life); genes are now linked in sequences so all information passes to offsprings. They had cell walls & outer membranes and were probably initially thermophiles. Further information: Cell (biology) § Origins |
| 3500.0 Ma BP | This marks the first appe

In [49]:
url = "https://en.wikipedia.org/wiki/Timeline_of_German_history"
tables = pd.read_html(url)
# https://en.wikipedia.org/wiki/30th_century_BC
# https://en.wikipedia.org/wiki/3rd_millennium_BC

In [50]:
tables[20]

Unnamed: 0,Year,Date,Event,Source
0,2002,1 January,Physical Euro currency was introduced. The Deu...,
1,2005,19 April,Pope Benedict XVI was elected pope.,
2,2005,22 November,Angela Merkel of the CDU became chancellor in ...,
3,2006,9 June – 9 July,2006 FIFA World Cup: The 2006 FIFA World Cup w...,
4,2006,22 September,Lathen train collision,
5,2008,14 September,"Sebastian Vettel wins the Italian Grand Prix, ...",
6,2009,27 September,"German federal election, 2009: Elections were ...",[44]
7,2010,23 April,European debt crisis: Greece requested a loan ...,
8,2010,29 May,"Germany wins the Eurovision Song Contest 2010,...",
9,2010,14 November,Sebastian Vettel wins the Abu Dhabi Grand Prix...,


In [51]:
print("| Date | Event in German history |")
print("| ---- | ---- |")
for index in range(0, 21):
    for i, row in enumerate(tables[index].iterrows()):
        year = str(row[1].iloc[0])
        if pd.isna(year) or year == "nan":
            continue
        year = year.replace(" B.C.", " BC").replace("A.D.", "").replace(" BCE", " BC").replace(" CE", "").strip()
        year = year.replace("Late 3rd Millennium BC", "2200 BC - 2001 BC")
        year = year.replace("3rd Millennium BC", "300 BC - 2001 BC")
        year = year.replace("5th century BC", "500 BC - 401 BC")
        year = year.split(".")[0]  # remove .0, pandas artifact?
        date_rest = row[1].iloc[1]
        if pd.isna(date_rest):
            date_rest = ""
        event = row[1].iloc[2]
        if pd.isna(event):
            continue

        if date_rest != "":
            date = date_merge(year, date_rest)
        else:
            date = year
        date, remarks = date_clean(date)
        if remarks != "":
            event = f"Date: {remarks}, {event}"
        print(f"| {date} | {event} |")

| Date | Event in German history |
| ---- | ---- |
| 609.0 kya BP | Date: ±40000, The hominid to whom the Mauer 1 mandible (discovered in 1907 in Mauer) belonged, the type specimen of Homo heidelbergensis, dies. |
| 225.0 kya BP | Date: approximate, The hominid to whom the Steinheim skull (discovered in 1933 in Steinheim an der Murr) belonged (previously sometimes dubbed Homo steinheimensis) dies. |
| 130.0 kya BP | Date: approximate, The Neanderthal (named after its initial site of discovery, the Neandertal valley) emerges in Europe. |
| 45000 BP | Date: approximate, Homo sapiens first appears in Europe (sometimes called EEMH or Cro-Magnon). |
| 35000-45000 BP | The Venus of Hohle Fels is made. |
| 32000 BP | Date: approximate, The Löwenmensch figurine is made. |
| 23000 BP | Date: approximate, The Venus of Willendorf is made. |
| 9551 BC | Date: approximate, The Pleistocene (Last Glacial Period (LGP)) ends. |
| 10000 BC | Date: approximate, The Ahrensburg culture prospers in northern

In [46]:
# url = "https://en.wikipedia.org/wiki/Timeline_of_United_States_history"
url = "https://en.wikipedia.org/wiki/Timeline_of_Italian_history"
tables = pd.read_html(url)

In [47]:
tables[35]

Unnamed: 0,Year,Date,Event
0,2001,11 June,Berlusconi's second term as prime minister beg...
1,2001,20 July,Violence erupts at the G8 demonstrations in Ge...
2,2001,October,Italy takes part in the Afghanistan War.
3,2001,November,Former President Giovanni Leone dies.[26]
4,2002,1 January,The euro begins circulating as new official cu...
5,2003,March,"Italy takes part in the Iraq War, although pop..."
6,2004,30 March,It is established the National Memorial Day of...
7,2005,4 March,"Nicola Calipari, Italian secret agent, is shot..."
8,2006,10 February,The 2006 Winter Olympics are held in Turin (to...
9,2006,17 May,Prodi's second term as prime minister begins.


In [48]:
print("| Date | Event in Italian history |")
print("| ---- | ---- |")
for index in range(2, 36):
    for i, row in enumerate(tables[index].iterrows()):
        year = str(row[1].iloc[0])
        if pd.isna(year) or year == "nan":
            continue
        year = year.replace(" B.C.", " BC").replace("A.D.", "").replace("ad", "").replace("AD", "").replace(" BCE", " BC").replace(" CE", "").strip()
        year = year.split(".")[0]  # remove .0, pandas artifact?
        date_rest = row[1].iloc[1]
        if pd.isna(date_rest):
            date_rest = ""
        event = row[1].iloc[2]
        if pd.isna(event):
            continue

        if date_rest != "":
            date = date_merge(year, date_rest)
        else:
            date = year
        date, remarks = date_clean(date)
        if remarks != "":
            event = f"Date: {remarks}, {event}"
        print(f"| {date} | {event} |")

| Date | Event in Italian history |
| ---- | ---- |
| 86949 BP | Oldest human habitation is discovered in Italy at Monte Poggiolo. |
| 51949 BP | Neanderthal presence in Italy. |
| 34949 BP | Paglicci 33 is discovered in Italy, the earliest evidence of Haplogroup I-M170. |
| 12000 BC | Villabruna 1 is discovered in Italy, the earliest evidence of Haplogroup R1b. |
| 6000 BC | Neolithic Italy begins with the spread of Cardium pottery. |
| 3345 BC | Otzi is born. |
| 3300 BC | Otzi is killed. |
| 3000 BC | Remedello culture. This is the first evidence of copper use in Italy. |
| 3000 BC | The Rinaldone culture appears. |
| 1800 BC | Nuragic civilization in Sardinia. |
| 1700 BC | Terramare culture. Recent archaeology, along with ancient Greek accounts, links this culture to the Etruscans. |
| 1500 BC | Apennine culture. |
| 1300 BC | Canegrate culture. |
| 1200 BC | Proto-Villanovan culture appears in Italy. It is likely a southern extension of the Urnfield culture. This is possibly the 

In [52]:
url = "https://en.wikipedia.org/wiki/List_of_presidents_of_the_United_States"
tables = pd.read_html(url)

In [54]:
tables[0]

Unnamed: 0,No.[a],Portrait,Name (Birth–Death),Term[14],Party[b][15],Party[b][15].1,Election,Vice President[16]
0,1,,George Washington (1732–1799) [17],"April 30, 1789 – March 4, 1797",,Unaffiliated,1788–1789 1792,John Adams[c]
1,2,,John Adams (1735–1826) [19],"March 4, 1797 – March 4, 1801",,Federalist,1796,Thomas Jefferson[d]
2,3,,Thomas Jefferson (1743–1826) [21],"March 4, 1801 – March 4, 1809",,Democratic- Republican,1800 1804,Aaron Burr George Clinton
3,4,,James Madison (1751–1836) [22],"March 4, 1809 – March 4, 1817",,Democratic- Republican,1808 1812,"George Clinton[e] Vacant after April 20, 1812 ..."
4,5,,James Monroe (1758–1831) [24],"March 4, 1817 – March 4, 1825",,Democratic- Republican,1816 1820,Daniel D. Tompkins
5,6,,John Quincy Adams (1767–1848) [25],"March 4, 1825 – March 4, 1829",,Democratic- Republican[f] National Republican,1824,John C. Calhoun[g]
6,7,,Andrew Jackson (1767–1845) [28],"March 4, 1829 – March 4, 1837",,Democratic,1828 1832,"John C. Calhoun[h] Vacant after December 28, 1..."
7,8,,Martin Van Buren (1782–1862) [29],"March 4, 1837 – March 4, 1841",,Democratic,1836,Richard Mentor Johnson
8,9,,William Henry Harrison (1773–1841) [30],"March 4, 1841 – April 4, 1841[e]",,Whig,1840,John Tyler
9,10,,John Tyler (1790–1862) [31],"April 4, 1841[i] – March 4, 1845",,Whig[j] Unaffiliated,–,Vacant throughout presidency


In [86]:
presidents = []
for i, row in enumerate(tables[0].iterrows()):
    president_number = row[1].iloc[0]
    name_birth = row[1].iloc[2]
    if pd.isna(name_birth):
        print("bad name_birth")
        continue
    term = row[1].iloc[3]
    if pd.isna(term):
        print("bad term")
        continue
    party = row[1].iloc[5]
    election = row[1].iloc[6]
    vice_president = row[1].iloc[7]

    name_birth = remove_footnotes(name_birth).replace("b. ", "").strip()
    nb_parts = name_birth.split("(")
    name = nb_parts[0].strip()
    birth = nb_parts[1].replace(")", "").strip().replace("–", " - ")
    b_parts = birth.split(",")
    if len(b_parts) == 2:
        date = date_merge(b_parts[1], b_parts[0])
    else:
        date = birth
    term = remove_footnotes(term).strip()
    term = term.replace(" – ", " - ").replace("–", " - ").replace("  ", " ").replace(" - Incumbent", "").strip()
    term_parts = term.split(" - ")
    if len(term_parts) == 1:
        term_start_parts = term_parts[0].split(",")
        term = date_merge(term_start_parts[1], term_start_parts[0])
    else:
        term_start_parts = term_parts[0].split(",")
        term_end_parts = term_parts[1].split(",")
        term = date_merge(term_start_parts[1], term_start_parts[0]) + " - " + date_merge(term_end_parts[1], term_end_parts[0])
    party = remove_footnotes(party).strip()
    election = remove_footnotes(election).strip().replace("–", " - ")
    if election == " - ":
        election = ""
    vice_president = remove_footnotes(vice_president).strip()
    if "Vacant throughout" in vice_president:
        vice_president = ""

    presidents.append((name, date, term, party, election, vice_president))


print("| Term | President | Party | Election(s) | Vice President |")
print("| ---- | ---- | ---- | ---- | ---- |")
for president in presidents:
    print(f"| {president[2]} | {president[0]} | {president[3]} | {president[4]} | {president[5]} |")

print()

print("| Date | US President biography (birth/death) |")
print("| ---- | ---- |")
for president in presidents:
    print(f"| {president[1]} | {president[0]} |")
    


| Term | President | Party | Election(s) | Vice President |
| ---- | ---- | ---- | ---- | ---- |
| 1789-04-30 - 1797-03-04 | George Washington | Unaffiliated | 1788 - 1789 1792 | John Adams |
| 1797-03-04 - 1801-03-04 | John Adams | Federalist | 1796 | Thomas Jefferson |
| 1801-03-04 - 1809-03-04 | Thomas Jefferson | Democratic- Republican | 1800  1804 | Aaron Burr George Clinton |
| 1809-03-04 - 1817-03-04 | James Madison | Democratic- Republican | 1808 1812 | George Clinton Vacant after April 20, 1812 Elbridge Gerry Vacant after November 23, 1814 |
| 1817-03-04 - 1825-03-04 | James Monroe | Democratic- Republican | 1816 1820 | Daniel D. Tompkins |
| 1825-03-04 - 1829-03-04 | John Quincy Adams | Democratic- Republican National Republican | 1824 | John C. Calhoun |
| 1829-03-04 - 1837-03-04 | Andrew Jackson | Democratic | 1828 1832 | John C. Calhoun Vacant after December 28, 1832 Martin Van Buren |
| 1837-03-04 - 1841-03-04 | Martin Van Buren | Democratic | 1836 | Richard Mentor Johnso

In [9]:
url="https://en.wikipedia.org/wiki/Timeline_of_Irish_history"
tables = pd.read_html(url)

In [10]:
print("| Date | Event in Irish history |")
print("| ---- | ---- |")
for index in range(0,23):
    for i, row in enumerate(tables[index].iterrows()):
        year = str(row[1].iloc[0])
        if pd.isna(year) or year == "nan":
            continue
        year = remove_footnotes(year)
        rest = row[1].iloc[1]
        if pd.isna(rest):
            rest = ""
        rest = remove_footnotes(rest)
        date = date_merge(year, rest)
        date, remarks = date_clean(date)
        event = row[1].iloc[2]
        event = remove_footnotes(event)
        if pd.isna(event):
            continue
        if remarks != "":
            event = f"Date: {remarks}, {event}"
        print(f"| {date} | {event} |")

| Date | Event in Irish history |
| ---- | ---- |
| 17949 BP | Date: c., During the Last Glacial Maximum, Ireland is covered in ice sheets |
| 12000 BC | Date: c., A narrow channel forms between Prehistoric Ireland and southwest Scotland |
| 10000 BC | Date: c., Carbon-dating on bear bones indicate the presence of Paleolithic people in County Clare. |
| 8000 BC | Date: c., Mesolithic hunter-gatherers migrate to Ireland |
| 6500 BC | Date: c., Mesolithic hunter-gatherers occupy sites such as that at Mount Sandel in Ulster |
| 4000 BC | Date: c., Agriculture (including the keeping of livestock, and crop farming) has its beginnings in Ireland, at sites such as the Céide Fields in Connacht |
| 3500 BC | Date: c., The Neolithic peoples of the Boyne Valley build a complex of chamber tombs, standing stones and enclosures over a period of hundreds of years. (Newgrange itself is dated to 3300–2900 BC). |
| 2000 BC | Date: c., Bronze Age technologies start to arrive in Ireland, including the mou

In [11]:
url = "https://en.wikipedia.org/wiki/Timeline_of_British_history"
tables = pd.read_html(url)

In [12]:
print("| Date | Event in British history |")
print("| ---- | ---- |")
for index in range(0,23):
    for i, row in enumerate(tables[index].iterrows()):
        year = str(row[1].iloc[0])
        if pd.isna(year) or year == "nan":
            continue
        year = remove_footnotes(year)
        rest = row[1].iloc[1]
        if pd.isna(rest):
            rest = ""
        rest = remove_footnotes(rest)
        date = date_merge(year, rest)
        date, remarks = date_clean(date)
        event = row[1].iloc[2]
        event = remove_footnotes(event)
        if pd.isna(event):
            continue
        if remarks != "":
            event = f"Date: {remarks}, {event}"
        print(f"| {date} | {event} |")

| Date | Event in British history |
| ---- | ---- |
| 55-01-01 BC | Roman General Julius Caesar invades Great Britain for the first time, gaining a beachhead on the coast of Kent. |
| 54 BC | Caesar invades for the second time, gaining a third of the country. These two invasions are known as Caesar's invasions of Britain. |
| 43 | Aulus Plautius leads an army of forty thousand to invade Great Britain. Emperor Claudius makes Britain a part of the Roman Empire. This is known as the Roman conquest of Britain. |
| 50 | London is founded. |
| 61 | Boudica's organised rebellion against the Romans is defeated. |
| 122 | Emperor Hadrian orders a wall to be built to mark Roman territory of Britain in the north. |
| 197 | Britain is divided into two parts - Britannia Superior and Britannia Inferior. |
| 206 | Governor Lucius Alfenus Senecio repairs Hadrian's Wall and appeals for help from the Emperor against the northern tribes. |
| 208 | Emperor Septimius Severus and his son Caracalla take pers

IndexError: list index out of range