# TimeTree 

In [1]:
import time
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

### use the one below to update your chromedriver

In [2]:
import chromedriver_autoinstaller

# This will automatically download and install the appropriate ChromeDriver
# version based on your currently installed Chrome browser version.
chromedriver_autoinstaller.install()

# Now you can get the ChromeDriver version
print(chromedriver_autoinstaller.get_chrome_version())

121.0.6167.85


In [3]:


# Load the dataset from the CSV file
df = pd.read_csv("DNA data.csv")  # Replace with your dataset file name and path
df["MYA"] = ""  # Add a new column for the MYA data

# Limit the loop to the first 15 rows
for index, row in df.head(15).iterrows():
    scientific_name = row["Scientific Name"]
    human_name = "Homo sapiens"

    # Initialize a new WebDriver instance in each iteration
    driver = webdriver.Chrome()  # Or use 'webdriver.Firefox()' for Firefox

    try:
        # Open the TimeTree webpage
        driver.get("http://timetree.org")

        # Find the input fields for the species names and enter the values
        species1_input = driver.find_element(By.ID, "taxon-a")
        species1_input.clear()
        species1_input.send_keys(human_name)

        species2_input = driver.find_element(By.ID, "taxon-b")
        species2_input.clear()
        species2_input.send_keys(scientific_name)

        # Find and click the search button
        search_button = driver.find_element(By.ID, "pairwise-search-button1")
        search_button.click()

        # Wait for the elements containing the text "MYA" to be loaded
        WebDriverWait(driver, 5).until(
            EC.presence_of_all_elements_located(
                (By.XPATH, "//*[contains(text(), 'MYA')]")
            )
        )
        # Locate all elements and extract the text of the second one
        mya_elements = driver.find_elements(
            By.XPATH, "//*[contains(text(), 'MYA')]")
        if len(mya_elements) >= 2:
            mya_text = mya_elements[1].text  # Get the text of the second element
            df.at[index, "MYA"] = mya_text
            print(scientific_name, " -> ",  mya_text)
    except TimeoutException:
        print(f"Timeout occurred for {scientific_name}.")
    except Exception as e:
        print(f"An error occurred for {scientific_name}: {e}")
    finally:
        # Close the browser after each iteration
        driver.quit()

# Save the updated DataFrame to a new CSV file
df.to_csv("Updated_DNA_data.csv", index=False)


Chelonia mydas  ->  319 MYA
Oncorhynchus clarkii seleniris  ->  429 MYA
Silene spaldingii  ->  1530 MYA
Gila purpurea  ->  429 MYA
Timeout occurred for Kinosternon sonoriense longifemorale.
Strix occidentalis lucida  ->  319 MYA
Orcuttia pilosa  ->  1530 MYA
Percina rex  ->  429 MYA
Anas wyvilliana  ->  319 MYA
Chasmistes brevirostris  ->  429 MYA
Timeout occurred for Idaholanx fresti.
Cicurina madla  ->  686 MYA
Timeout occurred for Puma yagouaroundi cacomitli.
Polyphylla barbata  ->  686 MYA
Pediocactus bradyi  ->  1530 MYA


In [4]:
df.head(15)

Unnamed: 0,Human,Scientific Name,MYA
0,homo sapien,Chelonia mydas,319 MYA
1,homo sapien,Oncorhynchus clarkii seleniris,429 MYA
2,homo sapien,Silene spaldingii,1530 MYA
3,homo sapien,Gila purpurea,429 MYA
4,homo sapien,Kinosternon sonoriense longifemorale,
5,homo sapien,Strix occidentalis lucida,319 MYA
6,homo sapien,Orcuttia pilosa,1530 MYA
7,homo sapien,Percina rex,429 MYA
8,homo sapien,Anas wyvilliana,319 MYA
9,homo sapien,Chasmistes brevirostris,429 MYA
