In [None]:
from pathlib import Path
import frontmatter
import regex

path = "/Users/jonathan/mres_thesis/wine_analysis_hplc_uv/src/wine_analysis_hplc_uv/notes/logbook/mres_logbook.qmd"

logfile = frontmatter.load(path)
content = logfile.content
str(content[0:100])

In [None]:
# break down the content by header

pattern = r"(?=^## .*$)"

split_content = regex.split(pattern, content, flags=regex.MULTILINE)
len(split_content)

In [None]:
# seperate the date header and the content
title_pattern = r"^## (?P<title>.*)\n\n"
content_pattern = r"(?P<content>(?s).*)"
pattern = title_pattern + content_pattern
# title_pattern = r"(?P<title>^*$)"

parsed_notes = []

for note in split_content:
    matches = regex.match(pattern=pattern, string=note)
    if matches:
        parsed_notes.append(matches.groupdict())
    else:
        print("no match")

In [None]:
# turn each note into a frontmatter object with the following fields: cdt (from title value of `parsed_notes`), tags: "mres","thesis", "log", type: log. Dump them in zettel
from datetime import datetime

tags = ["mres, thesis, log", "from_mres_logbook"]

md_notes = []
for note in parsed_notes:
    md_note = frontmatter.loads(note["content"])
    cdt = datetime.fromisoformat(note["title"]).isoformat()
    md_note["cdt"] = cdt
    md_note["tags"] = tags

    md_notes.append(md_note)

In [None]:
# verify date ordering by observing gradient of cdt along the ordered note list. Ordering should be based on appearance in file

import matplotlib.pyplot as plt

x = []
cdt = []
for idx, md_note in enumerate(md_notes):
    x.append(idx)
    cdt.append(datetime.fromisoformat(md_note["cdt"]))
plt.plot(x, cdt, "r")
plt.title("cdt vs note order")
plt.grid(alpha=0.5)
plt.show()

There is one negative towards the end of April. Zoom in on that.

In [None]:
x = []
cdt = []
for idx, md_note in enumerate(md_notes):
    x.append(idx)
    cdt.append(datetime.fromisoformat(md_note["cdt"]))
plt.plot(x, cdt, "r")
plt.axis([35, 40, datetime(2023, 4, 17), datetime(2023, 4, 23)])
plt.title("cdt vs note order")
plt.grid(alpha=0.5)
plt.show()

Its fine, looks legit, an error from a previous organisation effort. Now to add filenames. Filenames will be the date followed by a string concantenation of all subtitles, no seperations within the subtitles, subtitles separated by "_"


In [None]:
# produce titles for each file either from the subtitles of the day header or the first 30 characters of the content AFTER removing punctuation, filler words etc. Hopefully makes legitimate filenames.

punctuation = [".", ":", "\\", "[", "]", "- [x]", "*", "/", "`", ",", "-", "!"]
filler_words = [
    " i ",
    "I ",
    "the ",
    "after ",
    "today ",
    "Today ",
    "my ",
    "a ",
    "it ",
    "to ",
    "of ",
    " of ",
]

remove_list = punctuation + filler_words

note_dicts = []
for note in md_notes:
    note_dict = {}
    note_dict["note"] = note
    content = note.content
    first_title_pattern = "(?:\n\n|^)### (.*)"
    matches = regex.findall(first_title_pattern, content)

    ymd = datetime.fromisoformat(note["cdt"]).strftime("%Y-%m-%d")

    if matches:
        filename = (
            ymd
            + "_"
            + "_".join([match.lower().replace(" ", "") for match in matches[:3]])
        )
        for string in remove_list:
            filename = filename.replace(string, "")

    else:
        filtered_content = content.strip().lower()

        for string in remove_list:
            if string in filtered_content:
                filtered_content = (
                    filtered_content.replace(string, "").strip().replace("  ", "")
                )
        filename = (
            ymd
            + "_"
            + filtered_content[0:30].strip().replace("\n\n", "").replace(" ", "_")
        )
    note_dict["path"] = filename + ".md"

    note_dicts.append(note_dict)

In [None]:
# test writing
written_paths = []
test_outdir = Path(
    "/Users/jonathan/mres_thesis/wine_analysis_hplc_uv/src/wine_analysis_hplc_uv/notes/logbook"
)


def write_notes(note_dicts: list, out_dir: Path) -> None:
    try:
        for note in note_dicts:
            outpath = out_dir / note["path"]
            try:
                frontmatter.dump(post=note["note"], fd=outpath)
            except Exception as e:
                print(e)
            else:
                written_paths.append(outpath)

        # assert that every note parsed has been written, or at least, the same number
        assert len(written_paths) == len(md_notes)

        for path in written_paths:
            path.unlink()
    except Exception as e:
        print(e)


write_notes(note_dicts=note_dicts, out_dir=test_outdir)

In [None]:
# now do it for real

# write to zettel folder
out_dir = Path("/Users/jonathan/001_obsidian_vault/zettel")
write_notes(note_dicts=note_dicts, out_dir=out_dir)

Done.