In [1]:
import pandas as pd
import pygraphviz as pgv
import datetime
import dateutil.parser

In [2]:
workspace = pd.read_excel("Beach Lineage.xlsx", sheet_name=None, dtype=str)
for df in workspace.values():
    for col in df.columns:
        df[col] = df[col].map(lambda s: s.strip() if pd.notna(s) else s)

In [3]:
people_df = workspace["People"]
people_df = people_df[people_df["Name"].notna()]

def _parse_year(s):
    if s and pd.notna(s):
        return dateutil.parser.parse(s).year

for key in ["Born", "Died"]:
    people_df[key] = people_df[key].map(_parse_year)

relations_df = workspace["Relationships"]
relations_df["Relationship"] = relations_df["Relationship"].map(
    lambda s: s.lower().removeprefix("is ") if s and pd.notna(s) else ""
)
relations_df = relations_df[relations_df["Source Person"].notna()]
relations_df["Year"] = relations_df["Year"].map(_parse_year)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  people_df[key] = people_df[key].map(_parse_year)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  relations_df["Year"] = relations_df["Year"].map(_parse_year)


In [4]:
def get_label(name, born, died):
    rv = name
    if born and pd.notna(born):
        if isinstance(born, int):
            pass
        elif isinstance(born, datetime.datetime):
            born = born.year
        if died and pd.notna(died):
            if isinstance(died, datetime.datetime):
                died = died.year
            rv += f"\nBorn: {int(born)}, Died: {int(died)}"
        else:
            rv += f"\nBorn: {int(born)}"
    return rv

In [5]:
graph = pgv.AGraph(
    strict=True,
    directed=True,
)

for name, gender, _title, born, died, *_ in people_df.values:
    if not gender or pd.isna(gender):
        color = "black"
    elif gender.lower() in {"m", "male"}:
        color = "blue"
    else:
        color = "red"
    
    graph.add_node(
        name,
        # shape="square",
        color=color,
        # style="filled",
        # fontsize=10,
        # fillcolor=color,
        label=get_label(name, born, died),
        # fixedsize="true",
        # width=0.2,
        # height=0.2,
    )

for s, p, o, year in relations_df.values:
    if p == "child of":
        graph.add_edge(o, s, label="parent of")  
    elif p == "married to":
        label = "married to"
        if pd.notna(year):
            label = f"{label}\n({int(year)})"
        graph.add_edge(o, s, label=label) 
    else:
        graph.add_edge(s, o, label=p)    

graph.draw("diagram.pdf", prog="dot")