# how to generate files

### - Download markdown of the person you want to add from Roam and add it to covert_files folder

### - run this and fix file if needed

In [118]:
import re

def format_references(text):
    return re.sub(r"\[(\d+(?:,\s*\d+)*)\]", r'[[\1]](#refs){:class="ref"}', text)

def format_names(text):
    # Pattern to match names with optional birth/death years
    # name_pattern = r"([A-Z][a-z]+(?: [A-Z][a-z]+)*(\s\(\d{4}(?:-\d{4})?\))?)"
    name_pattern = r"([A-ZÀ-ÿ][a-zÀ-ÿ]+(?: [A-Z][a-z]+)*(\s\(\d{4}(?:-\d{4})?\))?)"
    # Replace names with the formatted version (keeping the years)
    formatted_text = re.sub(name_pattern, r'[\1](){:class="underconstruction"}', text)
    return formatted_text

def format_wedding_strings(text):
    # Regex to capture the full name (including optional birth/death years) before a reference or comma
    name_pattern = r"^([\wÀ-ÿ\s]+(?:\(\d{4}(?:-\d{4})?\))?)"

    # Replace the matched name with the formatted version
    formatted_text = re.sub(name_pattern, r'[\1](){:class="underconstruction"}', text)

    return formatted_text

def clean(text):
  text = " ".join(text.split())
  text = text.replace("[[", "")
  text = text.replace("]]", "")
  text = text.replace("\n", "")
  text = text.replace("*", "")
  return text

def clean_dict(dictionary):
  d = {}
  for k, v in dictionary.items():
    if v != {}:
      d[k] = {}
    if  isinstance(v, list):
        d[k] = v
    else:
      for k2, v2 in v.items():
        if v2:
            d[k][k2] = v2
  return d

In [119]:
def read_lines(filename):
    with open(f"convert_files/{filename}", 'r') as f:
        lines = f.readlines()
    lines = [clean(line) for line in lines if clean(line)!='']
    return lines

In [120]:
def extract_person_dict(lines):
  person = {}
  section="other"
  for i, line in enumerate(lines):
      if "###" in line:
          section = line.replace("- ### ","")
          if section == "Vida" or section == "Documentos":
            person[section] = []
          else:
            person[section] = {}
          pass
      elif section == "Vida":
        person[section].append(clean(line).replace("- ", ""))
      elif section == "Documentos":
        person[section].append(line)
      elif ":" in line:
        vals = line.split(":")
        # print(len(vals))
        if ("Morte" == vals[0] and (clean(vals[1]) == 'DATA, LOCAL')) or ("Sepultamento" == vals[0] and (clean(vals[1]) == 'Cemitério de')):
          person[section][vals[0]] = ''
        elif "Filhos" in vals[0]:
          filhos = []
          for filho in lines[i+1:]:
            if filho[0]=="-":
              filhos.append(filho.replace("- ", ""))
            else:
              break
          person[section]["Filhos"] = filhos
        else:
          person[section][vals[0]] = clean(":".join(vals[1:]))

  person = clean_dict(person)
  # display(person)
  return person

In [121]:
from datetime import date
def create_header(name, dates):
  timest = dates[0] if len(dates)>0 else "????"
  timeend = dates[1] if len(dates)>1 else "????"
  today = date.today().strftime("%d/%m/%Y")
  s = f"---\ntitle: \'{name} ({timest}-{timeend})\'\ndata: {today}\nnome: {name}\nlayout: pessoa\n---\n\n"
  return s

def get_personal_data(personal_data):
  lines = []
  for k, v in personal_data.items():
    if (k =="Pais") or (k=="Avós Paternos") or (k=="Avós Maternos"):
      text = format_names(v)
      # text = v.split(" e ")
      line = f'**{k}:** {text}<br/>'
      lines.append(line)
    elif k =="Casamento":
      print(v)
      text = format_wedding_strings(v)
      # text = v.split(",")
      # line = f'**{k}:** [{text[0]}](){{:class="underconstruction"}}, {",".join(text[1:])}<br/>'
      line = f'**{k}:** {text}<br/>'
      lines.append(line)
    elif k =="Filhos":
      line = f"**{k}:**<br/>"
      lines.append(line)
      # print(v)
      for i, filho in enumerate(v):
        filho = format_names(filho)
        line = f'&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{i+1}. {filho}<br/>'
        # line = f'&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;{i+1}. [{filho}](){{:class="underconstruction"}}<br/>'
        lines.append(line)
    else:
      line = f"**{k}:** {v}<br/>"
      lines.append(line)
  return format_references("\n".join(lines))

def get_life(vida):
  nl = '\n' 
  paragraphs = "\n\n".join(vida)
  text = f"{nl}{nl}## Vida:{nl}{nl}{paragraphs}{nl}"
  return text

def get_refs(refs):
  print(refs)
  nl = '\n' 
  paragraphs = "\n\n".join(refs)

  text = f"{nl}{nl}## Referências:  {{#refs}} {nl}{nl}{paragraphs}{nl}"
  return text

In [122]:
from os import listdir
from os.path import isfile, join
onlyfiles = [f for f in listdir("convert_files") if (isfile(join("convert_files", f)) and f !='.DS_Store')]
onlyfiles

['Christiano Frederico Scherer (1868-1945).md']

In [123]:
def get_time_name(filename):
  filename = filename.replace(".md", '')
  times = re.findall(r'\d+', filename)
  name = " ".join(re.sub(r'\([^)]*\)', '', filename).split())
  return times, name


In [124]:
def file_name_set(name, time):
  name_aux = "".join(name.split())
  date_aux = time[0] if len(time)>0 else ""
  return f"{name_aux}{date_aux}.md"

def save_file(name, time, person):
  f = open(f"converted/{file_name_set(name, time)}", "w")
  f.write(create_header(name, time))
  f.write(get_personal_data(person["Dados Pessoais"]))
  f.write(get_life(person["Vida"]))
  f.write(get_refs(person["Documentos"]))
  f.close()

In [125]:
for filename in onlyfiles:
  times, name = get_time_name(filename)
  lines = read_lines(filename)
  person = extract_person_dict(lines)
  save_file(name, times, person)


Elisa Carlotta Massmann (1877-1939), 16 de dezembro de 1890, Conventos, Santo Antônio da Estrella, Rio Grande do Sul [2]
['Certidão de Nascimento: ', '[1] Registro de Batismo:  Batismos 1865-1885 (49) - livro 1865-1885 num 341 year 1872 - conventos https://drive.google.com/file/d/1BPonIaAnEOaNHa4OduUpdrHPXJ9slrmd/view?usp=sharing', '[2] Certidão de Casamento: num 42, flh 20 year 1890 - lajeado https://www.familysearch.org/ark:/61903/3:1:3QS7-89L5-3B1S', '[3] Certidão de Óbito:  C-11 n 376 flh112 year 1945 - lajeado https://www.familysearch.org/ark:/61903/3:1:3QS7-89GW-9W99', 'Outros:', '- [4] Túmulo no Cemitério de Olarias -22 de abril de 1868 - 05 de fevereiro de 1945-https://drive.google.com/file/d/1UX1gwModVnmI8UZzjo8mvGrSbtddQU6Q/view?usp=sharing', '- [6] Certidão de óbito do pai Peter Scherer (1841-1917)', '- [7] Certidão de batismo da filha Malvina Scherer (1891-1962)', 'Agradecimento:', '- FamilySearch profile: https://www.familysearch.org/pt/tree/person/details/GQV3-GVQ']


In [126]:
import re

def format_wedding_strings(text):
    # Regex to capture the full name (including optional birth/death years) before a reference or comma
    name_pattern = r"^([\wÀ-ÿ\s]+(?:\(\d{4}(?:-\d{4})?\))?)"

    # Replace the matched name with the formatted version
    formatted_text = re.sub(name_pattern, r'[\1](){:class="underconstruction"}', text)

    return formatted_text

# Test cases
test_cases = [
    "Gilberto Rosa dos Santos [1], DATA, LOCAL",
    "Elvira Quinot (1914-1993)[3], 18 de dezembro de 1937, Lajeado, Rio Grande do Sul, Brasil [2]",
    "João da Silva, 12 de maio de 1950, Porto Alegre, RS",
    "Maria Fernanda Souza [4], 20 de junho de 1975, São Paulo, SP"
]

for text in test_cases:
    print(format_wedding_strings(text))


[Gilberto Rosa dos Santos ](){:class="underconstruction"}[1], DATA, LOCAL
[Elvira Quinot (1914-1993)](){:class="underconstruction"}[3], 18 de dezembro de 1937, Lajeado, Rio Grande do Sul, Brasil [2]
[João da Silva](){:class="underconstruction"}, 12 de maio de 1950, Porto Alegre, RS
[Maria Fernanda Souza ](){:class="underconstruction"}[4], 20 de junho de 1975, São Paulo, SP


In [127]:
# FOLHA = "6"
# NUM = "9"
# LINK = "https://drive.google.com/file/d/11alIGA7qSWr9gIa5GBm4zfa3JhQ1NJi1/view?usp=sharing"
# DATA = "11 janeiro 2025"
# print(F"Registro de Casamento da Paróquia Evangélica de Conventos, Livro 1914-1935 Folha {FOLHA} Num. {NUM}. Acervo Pessoal. Disponível em: [{LINK}]({LINK}). Acesso em: {DATA}.")

In [128]:
date.today().strftime("%d %B/%Y")

'10 August/2025'

In [129]:
# from babel.dates import dateformat
from babel.dates import format_date, format_datetime, format_time

format_date(date.today(), format="long", locale="pt_BR")
format_date(date.today(), "dd 'de' MMMM 'de' YYYY", locale="pt_BR")
# "EEE, MMM d, ''yy

'10 de agosto de 2025'

In [137]:
from datetime import date

DATE = format_date(date.today(), "dd 'de' MMMM 'de' YYYY", locale="pt_BR")

def format_reg_civil_reference(doc_details):
    matricula = f' Número de Matrícula: {doc_details["matricula"]}.' if  doc_details["matricula"] else ''
    link = f' Disponível em: [{doc_details["link"]}]({doc_details["link"]}). Acesso em: {DATE}.' if  doc_details["link"] else ' Entrar em contato.'
    is_acervo = f' Acervo Pessoal.' if {doc_details["is_acervo"]==True} else ''
    return f'''[XX] Registro de {doc_details["type"]}. Registro Civíl de Pessoas Naturais - {doc_details["place"]}. Ano {doc_details["year"]}. Livro {doc_details["book"]}, Folha {doc_details["page"]}, Num. {doc_details["number"]}.{matricula}{is_acervo}{link}'''


def format_reg_religioso(doc_details):
    link = f' Disponível em: [{doc_details["link"]}]({doc_details["link"]}). Acesso em: {DATE}.' if  doc_details["link"] else ' Entrar em contato.'
    is_acervo = f' Acervo Pessoal.' if {doc_details["is_acervo"]} else ''
    return f"[XX] Registro de {doc_details['type']}. {doc_details['church']}, Livro {doc_details['yearbook']}, Ano {doc_details['year']}, Página {doc_details['page']}, Num. {doc_details['number']}. {is_acervo}{link}"

In [131]:
https://drive.google.com/file/d/1Pd8VRauKC4xXh3gD06ftFGaUrv7AGOsI/view?usp=sharing

SyntaxError: invalid decimal literal (1478220327.py, line 1)

In [None]:
doc_details = {
    "type": "Casamento", # Batismo | "Óbito" | Casamento
    "church": "Comunidade Evangélica de Conventos",
    "yearbook": "xx-xx",
    "page": "xx",
    "number": "xx",
    "link": "xxxx",
    "is_acervo": True
}
# [2] Certidão de Casamento: num 42, flh 20 year 1890 - lajeado https://www.familysearch.org/ark:/61903/3:1:3QS7-89L5-3B1S
doc_details = {
    "type": "Batismo", # Batismo | "Óbito" | Casamento | Confirmação
    "church": "Paróquia Evangélica de Conventos",
    "yearbook": "1865-1885",
    "year": "1872",
    "page": None,
    "number": "341",
    "link": "https://drive.google.com/file/d/1BPonIaAnEOaNHa4OduUpdrHPXJ9slrmd/view?usp=sharing",
    "is_acervo": True
}
print(format_reg_religioso(doc_details))

[XX] Registro de Batismo. Paróquia Evangélica de Conventos, Livro 1865-1885, Ano 1872, Página None, Num. 341.  Acervo Pessoal. Disponível em: [https://drive.google.com/file/d/1BPonIaAnEOaNHa4OduUpdrHPXJ9slrmd/view?usp=sharing](https://drive.google.com/file/d/1BPonIaAnEOaNHa4OduUpdrHPXJ9slrmd/view?usp=sharing). Acesso em: 10 de agosto de 2025.


In [None]:
# doc_details = {
#     "type": "Nascimento", # "Óbito" | Casamento
#     "place": "Lajeado, Rio Grande do Sul, Brasil",
#     "year": "2023",
#     "book": "A-27",
#     "page": "33v",
#     "number": "2341",
#     "matricula": None,
#     "link": None,
#     "is_acervo": True
# }

# Certidão de Óbito:  C-11 n 376 flh 112 year 1945 - lajeado https://www.familysearch.org/ark:/61903/3:1:3QS7-89GW-9W99
doc_details = {
    "type": "Óbito", # Nascimento | "Óbito" | Casamento
    "place": "Lajeado, Rio Grande do Sul, Brasil",
    "year": 1945,
    "book": "C-11",
    "page": "112",
    "number": "376",
    "matricula": None,
    "link": "https://www.familysearch.org/ark:/61903/3:1:3QS7-89L5-3B1S",
    "is_acervo": False
}
print(format_reg_civil_reference(doc_details))

[XX] Registro de Óbito. Registro Civíl de Pessoas Naturais - Lajeado, Rio Grande do Sul, Brasil. Ano 1945. Livro C-11, Folha 112, Num. 376. Acervo Pessoal. Disponível em: [https://www.familysearch.org/ark:/61903/3:1:3QS7-89L5-3B1S](https://www.familysearch.org/ark:/61903/3:1:3QS7-89L5-3B1S). Acesso em: 10 de agosto de 2025.


In [None]:
len("After time/date selected – No refunds or cancellations allowed after booking")

76