In [1]:
from typing import List, Dict, Tuple, Union
import csv


Scientist = Dict[str, Union[str, int, List[str], None]]
QueryCondition = Tuple[str, str]


def load_db(file_path: str) -> List[Scientist]:
    """
    Load scientists' data from a TSV file.

    Args:
        file_path (str): Path to the TSV file.

    Returns:
        List[Scientist]: A list of dictionaries, each representing a scientist.
    """
    scientists = []
    with open(file_path, "r") as file:
        reader = csv.reader(file, delimiter="\t")
        for row in reader:
            scientist = {
                "first_name": row[0],
                "surname": row[1],
                "sex": row[2],
                "year_of_birth": int(row[3]),
                "year_of_death": int(row[4]) if row[4] else None,
                "occupation": row[5].strip('"').split("; "),
            }
            scientists.append(scientist)
    return scientists


def parse_query(query: str) -> List[QueryCondition]:
    """
    Parse a query string into a list of query conditions.

    Args:
        query (str): A string containing one or more conditions separated by 'AND'.

    Returns:
        List[QueryCondition]: A list of tuples, each containing a field and a value.

    Raises:
        ValueError: If the query format is invalid.
    """
    conditions = query.lower().split(" and ")  # 'AND' or 'and' are okay
    parsed_conditions: List[QueryCondition] = []

    for condition in conditions:
        parts = condition.split(":")
        if len(parts) == 2:
            field, value = parts
            parsed_conditions.append(
                (field.strip().lower(), value.strip().lower())
            )
        else:
            raise ValueError(f"Invalid condition format: {condition}")

    return parsed_conditions


def filter_scientists(
    scientists: List[Scientist], query: str
) -> List[Scientist]:
    """
    Filter scientists based on the given query.

    Args:
        scientists (List[Scientist]): List of scientists to filter.
        query (str): Query string to filter scientists.

    Returns:
        List[Scientist]: Filtered list of scientists matching the query.
    """
    parsed_query = parse_query(query)

    def matches_condition(
        scientist: Scientist, condition: QueryCondition
    ) -> bool:
        """
        Check if a scientist matches a single query condition.

        Args:
            scientist (Scientist): The scientist to check.
            condition (QueryCondition): The condition to match against.

        Returns:
            bool: True if the scientist matches the condition, False otherwise.
        """
        field, value = condition
        if field == "occupation":
            return value in [occ.lower() for occ in scientist[field]]
        if field == "sex":
            # Handle both 'f'/'m' and 'female'/'male' query values
            return scientist[field].lower().startswith(value[0])
        elif field == "alive-in":
            year = int(value)
            # Check if the scientist was alive in the given year
            return scientist["year_of_birth"] <= year and (
                scientist["year_of_death"] is None
                or scientist["year_of_death"] >= year
            )
        else:
            # Implicitly reject queries for unsupported attributes
            return False

    # Return scientists that match all conditions in the query
    return [
        s
        for s in scientists
        if all(matches_condition(s, cond) for cond in parsed_query)
    ]


def display_scientist(scientist: Scientist) -> None:
    """
    Utility function to pretty-print the scientist dictionary.

    Args:
        scientist (Scientist): The scientist to display.
    """
    print(f"Name: {scientist['first_name']} {scientist['surname']}")
    print(f"Sex: {scientist['sex']}")
    print(f"Born: {scientist['year_of_birth']}")
    print(
        f"Died: {scientist['year_of_death'] if scientist['year_of_death'] else 'Still alive'}"
    )
    print(f"Occupation(s): {', '.join(scientist['occupation'])}")
    print()

In [None]:
scientists = load_db("BiographicDB.tsv")
while True:
    print("How can I help you?")
    query = input(">")
    if query == "":
        break
    print(parse_query(query))
    results = filter_scientists(scientists, query)
    for row in results:
        display_scientist(row)
    print(f"Your query returned {len(results)} matches.")

How can I help you?


> sex:f


[('sex', 'f')]
Name: Klara Dan von Neumann
Sex: F
Born: 1911
Died: 1963
Occupation(s): mathematician, computer scientist

Name: Yvonne Y. Clark
Sex: F
Born: 1929
Died: 2019
Occupation(s): engineer

Name: Dorothy Hansine Andersen
Sex: F
Born: 1901
Died: 1963
Occupation(s): physician

Name: Emmy Noether
Sex: F
Born: 1882
Died: 1935
Occupation(s): mathematician

Name: Maryam Mirzakhani
Sex: F
Born: 1977
Died: 2017
Occupation(s): mathematician

Name: Rosalind Elsie Franklin
Sex: F
Born: 1920
Died: 1958
Occupation(s): chemist

Name: Lise Meitner
Sex: F
Born: 1878
Died: 1968
Occupation(s): physicist

Name: Marie Salomea Sklodowska-Curie
Sex: F
Born: 1867
Died: 1934
Occupation(s): physicist, chemist

Name: Susan Jocelyn Bell Burnell
Sex: F
Born: 1943
Died: Still alive
Occupation(s): physicist

Name: Ada Lovelace
Sex: F
Born: 1815
Died: 1852
Occupation(s): mathematician

Name: Katerina Yushchenko
Sex: F
Born: 1919
Died: 2001
Occupation(s): computer scientist

Name: Agnieszka Muszynska
Sex: F
B

> occupation:chemist and occupation:physicist


[('occupation', 'chemist'), ('occupation', 'physicist')]
Name: Marie Salomea Sklodowska-Curie
Sex: F
Born: 1867
Died: 1934
Occupation(s): physicist, chemist

Name: Michael Stefanidis
Sex: M
Born: 1868
Died: 1957
Occupation(s): mathematician, physicist, chemist

Your query returned 2 matches.
How can I help you?


> sex:f and sex:m


[('sex', 'f'), ('sex', 'm')]
Your query returned 0 matches.
How can I help you?


> sex:f and sex:m


[('sex', 'f'), ('sex', 'm')]
Your query returned 0 matches.
How can I help you?


> sex:f and occupation:computer scientist


[('sex', 'f'), ('occupation', 'computer scientist')]
Name: Klara Dan von Neumann
Sex: F
Born: 1911
Died: 1963
Occupation(s): mathematician, computer scientist

Name: Katerina Yushchenko
Sex: F
Born: 1919
Died: 2001
Occupation(s): computer scientist

Name: Ruzena Bajcsy
Sex: F
Born: 1933
Died: Still alive
Occupation(s): engineer, computer scientist

Your query returned 3 matches.
How can I help you?
