In [2]:
import os
from bigxml import Parser, xml_handle_element, xml_handle_text
from dataclasses import dataclass
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt

# parse enex files

In [3]:
@xml_handle_element("en-export", "note")
@dataclass
class Note:
    title: str = np.nan
    created: str = np.nan
    updated: str =np.nan
    content: str = np.nan
    tag: str = np.nan
    attributes: str = np.nan

    @xml_handle_element("title")
    def handle_title(self, node):
        self.title = node.text

    @xml_handle_element("created")
    def handle_created(self, node):
        self.created = node.text

    @xml_handle_element("updated")
    def handle_updated(self, node):
        self.updated = node.text

    @xml_handle_element("content")
    def handle_content(self, node):
        soup = BeautifulSoup(node.text)
        self.content = soup.text

    @xml_handle_element("tag")
    def handle_tag(self, node):
        self.tag = node.text

    @xml_handle_element("note-attributes")
    def handle_attributes(self, node):
        self.attributes = node.text



In [7]:
notes = []

# with open("../../enex-files/Evernote.enex", "rb") as f:
#     for item in Parser(f).iter_from(Note):
#         notes.append(item)

folder = input("Enter relative path to folder containing enex-files:")
for file in os.listdir(folder):
    with open(folder+file, "rb") as f:
        try:
            for item in Parser(f).iter_from(Note):
                notes.append(item)
        except Exception as e:
            print(e)

In [8]:
len(notes)

8

In [9]:
import numpy as np

df = pd.DataFrame(notes)
df["created"] = pd.to_datetime(df.created)
df["updated"] = pd.to_datetime(df.updated)
df.shape

(8, 6)

In [10]:
df.head(3)

Unnamed: 0,title,created,updated,content,tag,attributes
0,Daily Reflection,2023-09-21 15:26:36+00:00,2023-09-23 13:16:39+00:00,**Day 1: Arrival in Beijing***Reflection:*Toda...,,
1,Reading list,2023-09-21 15:54:18+00:00,2023-09-23 13:16:34+00:00,"1. **""The Call of the Wild"" by Jack London** ...",,
2,To-do list,2023-09-21 15:53:52+00:00,2023-09-23 13:16:22+00:00,**Research and Preparation:**1. Research the Z...,,


# prepare evernote data for langchain

In [13]:
df.to_csv(folder+"enex-parsed.csv")

In [16]:
from langchain.document_loaders.csv_loader import CSVLoader

loader = CSVLoader(file_path='../data-assets/enex-parsed.csv')
documents = loader.load()
documents[:3]

[Document(page_content=": 0\ntitle: Daily Reflection\ncreated: 2023-09-21 15:26:36+00:00\nupdated: 2023-09-23 13:16:39+00:00\ncontent: **Day 1: Arrival in Beijing***Reflection:*Today marked the beginning of an incredible journey through China. The energy and vibrancy of Beijing were palpable as I explored Tiananmen Square and the Forbidden City. The sheer magnitude of history within those walls left me in awe. The contrast between the ancient architecture and the modernity of the cityscape was a testament to China's rich cultural tapestry.**Day 2: The Great Wall of China (Mutianyu)***Reflection:*Hiking along the Great Wall was a surreal experience. The panoramic views from Mutianyu were breathtaking, and I couldn't help but marvel at the engineering prowess of ancient China. The Wall's undulating path seemed to stretch on forever, reminding me of the enduring spirit that built this remarkable structure.**Day 3: Summer Palace and Temple of Heaven***Reflection:*Visiting the Summer Palace

# visualize note metadata

In [None]:
tmp = df.created.dt.year.value_counts()
plt.barh(tmp.index, tmp.values)