In [1]:
import pandas as pd
from indralib.indra_time import IndraTime
import re

In [2]:
def remove_footnotes(text, numeric_only=True, single_letter_alpha=True):
    if numeric_only is False:
        text = re.sub(r"\[.*?\]", "", text)
    else:
        text = re.sub(r"\[\d+\]", "", text)
        if single_letter_alpha is True:
            text = re.sub(r"\[\w\]", "", text)
    return text

In [3]:
def extract_date_remarks(date):
    remarks = ""
    approxies = ["ca.", "c.", "circa", "around", "approximately", "~", ">", "<", "≈", "≥", "≤", "?"]
    for ap in approxies:
        if ap in date:
            date = date.replace(ap, "").strip()
            if date != "":
                remarks = f"{ap}"
    if "±" in date:
        dates = date.split("±")
        date = dates[0].strip()
        append = dates[1].strip().split(" ")
        if len(append) > 1:
            date = date + " " + append[1]
            remarks = "±" + append[0]
        else:
            remark = "±" + dates[1].strip()
    return date, remarks


In [4]:
def date_clean(date, default_scale = None):
    remarks = ""
    if isinstance(date, str):
        date = remove_footnotes(date)
        date = date.replace("–", " - ").replace(",", "").replace("\xa0", " ").replace("  ", " ").replace("BCE", "BC").strip()
        date_sub = date.split("-")
        if len(date_sub)==3 or (len(date_sub)==2 and " - " not in date):
            return date, remarks
        if default_scale is not None:
            date = date.split("-")
            if len(date) == 1:
                date = date[0].strip()+f" {default_scale}"
            else:
                date = f"{date[0].strip()} {default_scale} - {date[1].strip()} {default_scale}"
        else:
            dates = date.split(" - ")
            new_dates = []
            for di in dates:
                dj, rem = extract_date_remarks(di)
                new_dates.append(dj)
                if len(remarks) == 0:
                    remarks = rem
                else:
                    remarks += f", {rem}"
            date = " - ".join(new_dates)
            dates = date.split(" - ")
            if len(dates) == 2:
                sub_dates0 = dates[0].split(" ")
                sub_dates1 = dates[1].split(" ")
                if len(sub_dates0)==1 and len(sub_dates1)==2:
                    date = f"{sub_dates0[0]} {sub_dates1[1]} - {dates[1]}"
    else:
        if default_scale is not None:
            date = f"{date} {default_scale}"
        else:
            date = str(date)
    if "/" in date:
        date_parts = date.split("/")
        date = date_parts[0].strip()
        alt_date_stub = date_parts[1].strip()
        alt_date = date[:len(date)-len(alt_date_stub)] + alt_date_stub
        remarks = f"Alt.: {alt_date}"       
    jd_dates = IndraTime.string_time_2_julian(date)
    if len(jd_dates) > 1 and jd_dates[1] is not None:
        date_start = IndraTime.julian_2_string_time(jd_dates[0])
        date_end = IndraTime.julian_2_string_time(jd_dates[1])
        date = f"{date_start} - {date_end}"
    else:
        date = IndraTime.julian_2_string_time(jd_dates[0])

    return date, remarks

In [5]:
def date_merge(year, rest):
    day = None
    month = None
    rest = rest.strip()
    year = year.strip()
    if rest is not None and len(rest) > 0:
        rparts = rest.split(" ")
        if len(rparts) == 1:
            month = rparts[0].strip().lower()
        else:
            try:
                month = rparts[1].strip().lower()
                day = int(rparts[0].strip())
            except ValueError:
                month = None
                day = None
            if month is None and day is None:
                try:
                    month = rparts[0].strip().lower()
                    day = int(rparts[1].strip())
                except ValueError:
                    month = None
                    day = None
    else:
        month = None
        day = None
    if month is not None:
        val_months = ["january", "february", "march", "april", "may", "june", "july", "august", "september", "october", "november", "december"]
        try:
            month_id = val_months.index(month) + 1
        except ValueError:
            month_id = None
            return f"{year}"
        year_parts = str(year).replace("\xa0"," ").split(" ", 1)
        if len(year_parts) > 1:
            year = year_parts[0]
            appendix = year_parts[1]
        else:
            appendix = ""
        if day is not None:
            date = f"{year}-{month_id:02d}-{day:02d}"
            if appendix != "":
                date = date + f" {appendix}"
        else:
            date = f"{year}-{month_id:02d}"
            if appendix != "":
                date = date + f" {appendix}"
        return date
    else:
        return f"{year}"

In [16]:
url = "https://en.wikipedia.org/wiki/Timeline_of_Iranian_history"
tables = pd.read_html(url)

In [17]:
tables[0]

Unnamed: 0,Year,Date,Event
0,3200 BC,,Elam civilization in the far west and southwes...


In [18]:
tables[32]

Unnamed: 0,Year,Date,Event
0,2001,8 June,"Iranian presidential election, 2001: President..."
1,2003,December,"40,000 people are killed in an earthquake in s..."
2,2005,24 June,"Iranian presidential election, 2005: Ahmadinej..."
3,2009,12 June,"Iranian presidential election, 2009: Ahmadinej..."
4,2009,13 June,2009–10 Iranian election protests: Protests in...
5,2013,3 August,Hassan Rouhani replaces Ahmadinejad as President.
6,2014,,"My Stealthy Freedom, an online movement in whi..."
7,2015,14 July,Signing of Iran nuclear deal between Iran and ...
8,2018,8 May,United States withdraws from the Iran nuclear ...
9,2017–19,,Iranian woman protest against compulsory hijab...


In [19]:
print("| Date | Event in Persian (Iranian) history |")
print("| ---- | ---- |")
for index in range(0,33):
    for i, row in enumerate(tables[index].iterrows()):
        year = str(row[1].iloc[0])
        if pd.isna(year) or year == "nan":
            continue
        year = remove_footnotes(year)
        rest = row[1].iloc[1]
        if pd.isna(rest):
            rest = ""
        rest = remove_footnotes(rest)
        date = date_merge(year, rest)
        date, remarks = date_clean(date)
        event = row[1].iloc[2]
        event = remove_footnotes(event)
        if pd.isna(event):
            continue
        if remarks != "":
            event = f"Date: {remarks}, {event}"
        print(f"| {date} | {event} |")

| Date | Event in Persian (Iranian) history |
| ---- | ---- |
| 3200 BC | Elam civilization in the far west and southwest of modern-day Iran and modern-day southeast Iraq. |
| 1250 BC | Untash-Napirisha, king of Elam, builds the Chogha Zanbil ziggurat complex in present-day Khuzestan Province. |
| 1210 BC | Elamite Empire reaches the height of its power. |
| 770 BC | The Persians start driving the Elamites of Anshan towards Susa. |
| 727 BC | Deioces founds the Median government. |
| 705 BC | Birth of Achaemenes (died c. 675 BC), the eponymous ancestor of the Achaemenid dynasty. |
| 647 BC | Assyrian Empire defeats Elam Empire in the Battle of Susa, resulting in looting and total destruction of Susa. |
| 633 BC | The Scythians invade Media. |
| 624 BC | The Medians repel the Scythians. |
| 624 BC | Cyaxares the Great becomes the king of the Medes. |
| 612 BC | Together with the Babylonians, Cyaxares the Great captures the Assyrian capital Nineveh, which leads to the eventual collapse o

In [20]:
url = "https://en.wikipedia.org/wiki/Egyptian_pyramids"
tables = pd.read_html(url)

In [21]:
tables[12]

Unnamed: 0,Pyramid (Pharaoh),Reign,Field,Height
0,Pyramid of Djoser (Djoser),c. 2670 BCE,Saqqara,62 meters (203 feet)
1,Red Pyramid (Sneferu),c. 2612–2589 BCE,Dahshur,104 meters (341 feet)
2,Meidum Pyramid (Sneferu),c. 2612–2589 BCE,Meidum,65 meters (213 feet) (ruined) Would have been ...
3,Great Pyramid of Giza (Khufu),c. 2589–2566 BCE,Giza,146.7 meters (481 feet) or 280 Egyptian Royal ...
4,Pyramid of Djedefre (Djedefre),c. 2566–2558 BCE,Abu Rawash,60 meters (197 feet)
5,Pyramid of Khafre (Khafre),c. 2558–2532 BCE,Giza,136.4 meters (448 feet) Originally: 143.5 m (4...
6,Pyramid of Menkaure (Menkaure),c. 2532–2504 BCE,Giza,65 meters (213 feet) or 125 Egyptian Royal cubits
7,Pyramid of Userkaf (Userkaf),c. 2494–2487 BCE,Saqqara,48 meters (161 feet)
8,Pyramid of Sahure (Sahure),c. 2487–2477 BCE,Abusir,47 meters (155 feet)
9,Pyramid of Neferirkare (Neferirkare Kakai),c. 2477–2467 BCE,Abusir,72.8 meters (239 feet)


In [25]:
print("| Date | Pyramid | Pharaoh | Field | Height |")
print("| ---- | ---- | ---- | ---- | ---- |")
for i, row in enumerate(tables[12].iterrows()):
    date = row[1].iloc[1]
    if pd.isna(date) or date == "nan":
        continue
    date = remove_footnotes(str(date))
    date, remarks = date_clean(date)
    pyramid = row[1].iloc[0]
    pyramid = remove_footnotes(pyramid)
    pyr_phar = pyramid.split("(")
    pyramid = pyr_phar[0].strip()
    if len(pyr_phar) > 1:
        pharao = pyr_phar[1].split(")")[0]
    else:
        pharao = ""
    if pd.isna(pyramid):
        continue
    field = row[1].iloc[2]
    field = remove_footnotes(field)
    height = row[1].iloc[3]
    height = remove_footnotes(height)
    if remarks != "":
        pyramid = f"Date: {remarks}, {pyramid}"
    print(f"| {date} | {pyramid} | {pharao} | {field} | {height} |")

| Date | Pyramid | Pharaoh | Field | Height |
| ---- | ---- | ---- | ---- | ---- |
| 2670 BC | Date: c., Pyramid of Djoser | Djoser | Saqqara | 62 meters (203 feet) |
| 2612 BC - 2589 BC | Date: c., , Red Pyramid | Sneferu | Dahshur | 104 meters (341 feet) |
| 2612 BC - 2589 BC | Date: c., , Meidum Pyramid | Sneferu | Meidum | 65 meters (213 feet) (ruined) Would have been 91.65 meters (301 feet)[citation needed] or 175 Egyptian Royal cubits. |
| 2589 BC - 2566 BC | Date: c., , Great Pyramid of Giza | Khufu | Giza | 146.7 meters (481 feet) or 280 Egyptian Royal cubits |
| 2566 BC - 2558 BC | Date: c., , Pyramid of Djedefre | Djedefre | Abu Rawash | 60 meters (197 feet) |
| 2558 BC - 2532 BC | Date: c., , Pyramid of Khafre | Khafre | Giza | 136.4 meters (448 feet) Originally: 143.5 m (471 ft) or 274 Egyptian Royal cubits |
| 2532 BC - 2504 BC | Date: c., , Pyramid of Menkaure | Menkaure | Giza | 65 meters (213 feet) or 125 Egyptian Royal cubits |
| 2494 BC - 2487 BC | Date: c., , Pyramid

In [27]:
url= "https://en.wikipedia.org/wiki/Dynasties_of_ancient_Egypt"
tables = pd.read_html(url)

In [37]:
tables[12]

Unnamed: 0_level_0,Dynasty,Seat,Period of rule,Period of rule,Period of rule,Rulers,Rulers,Rulers
Unnamed: 0_level_1,Dynasty,Seat,Start,End,Term,First to rule,Last to rule,List / Family tree
Unnamed: 0_level_2,Early Dynastic Period,Early Dynastic Period,Early Dynastic Period,Early Dynastic Period,Early Dynastic Period,Early Dynastic Period,Early Dynastic Period,Early Dynastic Period
0,Dynasty I,Thinis,3150 BC,2900 BC,250 years,Narmer,Qa'a,(list)(tree)
1,Dynasty II,Thinis,2880 BC,2686 BC,204 years,Hotepsekhemwy,Khasekhemwy,(list)
2,Old Kingdom,Old Kingdom,Old Kingdom,Old Kingdom,Old Kingdom,Old Kingdom,Old Kingdom,Old Kingdom
3,Dynasty III,Memphis,2687 BC,2613 BC,73 years,Djoser,Huni,(list)
4,Dynasty IV,Memphis,2613 BC,2494 BC,112 years,Sneferu,Shepseskaf or Thamphthis[a],(list)(tree)
5,Dynasty V,Memphis,2494 BC,2345 BC,149 years,Userkaf,Unas,(list)
6,Dynasty VI,Memphis,2345 BC,2181 BC,164 years,Teti,Merenre Nemtyemsaf II or Netjerkare Siptah[b] ...,(list)
7,First Intermediate Period,First Intermediate Period,First Intermediate Period,First Intermediate Period,First Intermediate Period,First Intermediate Period,First Intermediate Period,First Intermediate Period
8,Dynasty VII[d],Memphis[1]: 396,Unknown,Unknown,Unknown,Unknown,Unknown,(list)
9,Dynasty VIII,Memphis[1]: 396,2181 BC,2160 BC,21 years,Netjerkare Siptah[b] or Menkare,Neferirkare II,(list)


In [41]:
print("| Date | Egyptian dynasty | Seat | First to rule | Last to rule | Duration (years) |")
print("| ---- | ---- | ---- | ---- | ---- | ---- |")  
for i, row in enumerate(tables[12].iterrows()):
    start = row[1].iloc[2]
    end = row[1].iloc[3]
    if start == end:
        continue  # skip the header
    date = f"{start} - {end}"
    date = remove_footnotes(str(date))
    date, remarks = date_clean(date)
    dynasty = remove_footnotes(row[1].iloc[0])
    seat = remove_footnotes(row[1].iloc[1])
    duration = row[1].iloc[4].split(" ")[0]
    first = remove_footnotes(row[1].iloc[5])
    last = remove_footnotes(row[1].iloc[6])
    if remarks != "":
        dynasty = f"Date: {remarks}, {dynasty}"
    print(f"| {date} | {dynasty} | {seat} | {first} | {last} | {duration} |")

| Date | Egyptian dynasty | Seat | First to rule | Last to rule | Duration (years) |
| ---- | ---- | ---- | ---- | ---- | ---- |
| 3150 BC - 2900 BC | Dynasty I | Thinis | Narmer | Qa'a | 250 |
| 2880 BC - 2686 BC | Dynasty II | Thinis | Hotepsekhemwy | Khasekhemwy | 204 |
| 2687 BC - 2613 BC | Dynasty III | Memphis | Djoser | Huni | 73 |
| 2613 BC - 2494 BC | Dynasty IV | Memphis | Sneferu | Shepseskaf or Thamphthis | 112 |
| 2494 BC - 2345 BC | Dynasty V | Memphis | Userkaf | Unas | 149 |
| 2345 BC - 2181 BC | Dynasty VI | Memphis | Teti | Merenre Nemtyemsaf II or Netjerkare Siptah or Nitocris | 164 |
| 2181 BC - 2160 BC | Dynasty VIII | Memphis: 396 | Netjerkare Siptah or Menkare | Neferirkare II | 21 |
| 2160 BC - 2130 BC | Dynasty IX | Heracleopolis Magna | Meryibre Khety | Unknown | 30 |
| 2130 BC - 2040 BC | Dynasty X | Heracleopolis Magna | Meryhathor | Unknown | 90 |
| 2130 BC - 1991 BC | Dynasty XI | Thebes | Intef | Mentuhotep IV | 139 |
| 1991 BC - 1802 BC | Dynasty XII | I

In [9]:
url = "https://en.wikipedia.org/wiki/Bond_event"
tables = pd.read_html(url)

In [10]:
len(tables)

15

In [11]:
tables[0]

Unnamed: 0,No,Time (BP),"Time (AD, BC)",Gap from previous event,Notes
0,0,≈ −0.5 ka,≈ 1500 AD,900 years,See Little Ice Age[12]
1,1,≈ −1.4 ka,≈ 600 AD,1400 years,See Migration Period[12] and Late Antique Litt...
2,2,≈ −2.8 ka,≈ 800 BC,1400 years,See Iron Age Cold Epoch
3,3,≈ −4.2 ka,≈ 2200 BC,1700 years,See 4.2-kiloyear event; collapse of the Akkadi...
4,4,≈ −5.9 ka,≈ 3900 BC,2300 years,"Sahara desert reforms by 3500–3000 BC, ending ..."
5,5,≈ −8.2 ka,≈ 6200 BC,1200 years,See 8.2-kiloyear event
6,6,≈ −9.4 ka,≈ 7400 BC,1100 years,"Erdalen event of glacier activity in Norway,[1..."
7,7,≈ −10.3 ka,≈ 8300 BC,800 years,
8,8,≈ −11.1 ka,≈ 9100 BC,—,Transition from the Younger Dryas to the Borea...


In [14]:
print("| Date | Bond event name | Id |")
print("| --- | --- | --- |")
for i, row in enumerate(tables[0].iterrows()):
    no = row[1].iloc[0]
    date = row[1].iloc[2]
    name = row[1].iloc[4]
    if pd.isna(date) or pd.isna(name):
        name = ""
    else:
       name = remove_footnotes(name)
       name = name.replace("See ", "")
    
    date, remark = date_clean(date)
    if remark != "":
        name = f"Date: {remark}, {name}"
    print(f"| {date} | {name} | {no} |")

| Date | Bond event name | Id |
| --- | --- | --- |
| 1500 | Date: ≈, Little Ice Age | 0 |
| 600 | Date: ≈, Migration Period and Late Antique Little Ice Age | 1 |
| 800 BC | Date: ≈, Iron Age Cold Epoch | 2 |
| 2200 BC | Date: ≈, 4.2-kiloyear event; collapse of the Akkadian Empire and the end of the Egyptian Old Kingdom. | 3 |
| 3900 BC | Date: ≈, Sahara desert reforms by 3500–3000 BC, ending Neolithic Subpluvial. Piora Oscillation. Early Bronze Age begins ~3300 BC. | 4 |
| 6200 BC | Date: ≈, 8.2-kiloyear event | 5 |
| 7400 BC | Date: ≈, Erdalen event of glacier activity in Norway, as well as a cold event in China. | 6 |
| 8300 BC | Date: ≈,  | 7 |
| 9100 BC | Date: ≈, Transition from the Younger Dryas to the Boreal. | 8 |
