In [1]:
import pandas as pd
import glob


# store raw kindle notes in this directory
KINDLE_RAW_NOTES_DIRECTORY = "data/kindle/raw"

# store formatted kindle notes in this directory
KINDLE_FORMATTED_NOTES_DIRECTORY = "data/kindle/"

In [2]:
def extract_title_author(raw_notes_path):
    """
    Extract title and author from header of raw kindle notes csv file.
    """
    with open(raw_notes_path) as file:
        lines = file.readlines()
        # skip ",,,"
        title = lines[1].strip()[:-3].title().strip('"')
        # also skip "by "
        author = lines[2].strip()[3:-3].title().strip('"')
        return title, author

    
def create_kindle_notes_filename(title):
    """
    Return filename as title of book with "_" as spaces followed by "_notes.csv".
    
    Example:
    "ego_is_the_enemy_notes.csv"
    
    """
    return ('_').join(title.lower().split()) + "_notes.csv"


def format_kindle_notes_to_csv(raw_notes_path, formatted_notes_directory):
    """
    Format kindle notes and export to csv file with each row containing data for:
    
        - Annotation Type
        - Location
        - Starred?
        - Annotation
        - Author
        - Title

    """
    title, author = extract_title_author(raw_notes_path)

    # import notes to df without header
    notes_df = pd.read_csv(raw_notes_path, header=7)
    
    # add columns for title and author
    notes_df["Author"] = author
    notes_df["Title"] = title
    
    formatted_notes_path = formatted_notes_directory + create_kindle_notes_filename(title)
    notes_df.to_csv(formatted_notes_path, index=False)

In [3]:
# # store raw kindle notes in this directory
# KINDLE_RAW_NOTES_DIRECTORY = "data/kindle/raw"

In [4]:
# # import glob

# # grab list of raw kindle notes files
# raw_notes_paths = glob.glob(KINDLE_RAW_NOTES_DIRECTORY+'/*.csv')
# raw_notes_paths

In [5]:
# for raw_notes_path in raw_notes_paths:
#     format_kindle_notes_to_csv(raw_notes_path, KINDLE_FORMATTED_NOTES_DIRECTORY)

In [6]:
# grab list of raw kindle notes files
raw_notes_paths = glob.glob(KINDLE_RAW_NOTES_DIRECTORY+'/*.csv')

for raw_notes_path in raw_notes_paths:
    format_kindle_notes_to_csv(raw_notes_path, KINDLE_FORMATTED_NOTES_DIRECTORY)

In [7]:
# format_kindle_notes_to_csv("Ego Is the Enemy-Notebook - Ego Is the Enemy-Notebook.csv", KINDLE_FORMATTED_NOTES_DIRECTORY)

In [8]:
# # check results...
# formatted_notes_paths = glob.glob(KINDLE_FORMATTED_NOTES_DIRECTORY+'/*.csv')
# formatted_notes_paths

In [9]:
# check results...
formatted_notes_paths = glob.glob(KINDLE_FORMATTED_NOTES_DIRECTORY+'/*.csv')

for formatted_notes_path in formatted_notes_paths:
    print(formatted_notes_path)
    display(pd.read_csv(formatted_notes_path).sample(5))

data/kindle/ego_is_the_enemy_notes.csv


Unnamed: 0,Annotation Type,Location,Starred?,Annotation,Author,Title
59,Highlight (Yellow),Location 1120,,"Fac, si facis. (Do it if you’re going to do it.)",Ryan Holiday,Ego Is The Enemy
50,Highlight (Yellow),Location 1022,,Pride blunts the very instrument we need to ow...,Ryan Holiday,Ego Is The Enemy
104,Highlight (Yellow),Location 2526,,training was like sweeping the floor. Just bec...,Ryan Holiday,Ego Is The Enemy
17,Highlight (Yellow),Location 434,,"Almost universally, the kind of performance we...",Ryan Holiday,Ego Is The Enemy
103,Highlight (Yellow),Location 2499,,"the old Celtic saying tells us, “See much, stu...",Ryan Holiday,Ego Is The Enemy


data/kindle/the_inevitable:_understanding_the_12_technological_forces_that_will_shape_our_future_notes.csv


Unnamed: 0,Annotation Type,Location,Starred?,Annotation,Author,Title
0,Highlight (Yellow),Location 106,,We are morphing so fast that our ability to in...,Kevin Kelly,The Inevitable: Understanding The 12 Technolog...
6,Highlight (Yellow),Location 2386,,"At this point in our history, sharing somethin...",Kevin Kelly,The Inevitable: Understanding The 12 Technolog...
5,Highlight (Yellow),Location 1464,,The link and the tag now make screening the un...,Kevin Kelly,The Inevitable: Understanding The 12 Technolog...
4,Highlight (Yellow),Location 211,,"In this era of “becoming,” everyone becomes a ...",Kevin Kelly,The Inevitable: Understanding The 12 Technolog...
3,Highlight (Yellow),Location 208,,Technological life in the future will be a ser...,Kevin Kelly,The Inevitable: Understanding The 12 Technolog...


data/kindle/can't_hurt_me:_master_your_mind_and_defy_the_odds_notes.csv


Unnamed: 0,Annotation Type,Location,Starred?,Annotation,Author,Title
20,Highlight (Yellow),Page 91,,"What the fuck? I mean, seriously, what the fuc...",David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
62,Highlight (Yellow),Page 214,,"I just wanted to quit, but by moving a little ...",David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
98,Highlight (Yellow),Page 353,,It’s what you tell yourself that matters. The ...,David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
86,Highlight (Yellow),Page 282,,an opportunity to be uncommon. Not that anybod...,David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
61,Highlight (Yellow),Page 213,,"every failure there is something to be gained,...",David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
