In [1]:
import pandas as pd

# store formatted kindle notes in this directory
KINDLE_FORMATTED_NOTES_DIRECTORY = "data/kindle/"

In [2]:
def extract_title_author(raw_notes_path):
    """
    Extract title and author from header of raw kindle notes csv file.
    """
    with open(raw_notes_path) as file:
        lines = file.readlines()
        # skip ",,,"
        title = lines[1].strip()[:-3].title().strip('"')
        # also skip "by "
        author = lines[2].strip()[3:-3].title().strip('"')
        return title, author

    
def create_kindle_notes_filename(title):
    """
    Return filename as title of book with "_" as spaces followed by "_notes.csv".
    
    Example:
    "ego_is_the_enemy_notes.csv"
    
    """
    return ('_').join(title.lower().split()) + "_notes.csv"


def format_kindle_notes_to_csv(raw_notes_path, formatted_notes_directory):
    """
    Format kindle notes and export to csv file with each row containing data for:
    
        - Annotation Type
        - Location
        - Starred?
        - Annotation
        - Author
        - Title

    """
    title, author = extract_title_author(raw_notes_path)

    # import notes to df without header
    notes_df = pd.read_csv(raw_notes_path, header=7)
    
    # add columns for title and author
    notes_df["Author"] = author
    notes_df["Title"] = title
    
    formatted_notes_path = formatted_notes_directory + create_kindle_notes_filename(title)
    notes_df.to_csv(formatted_notes_path, index=False)

In [3]:
# store raw kindle notes in this directory
KINDLE_RAW_NOTES_DIRECTORY = "data/kindle/raw"

In [4]:
import glob

# grab list of raw kindle notes files
raw_notes_paths = glob.glob(KINDLE_RAW_NOTES_DIRECTORY+'/*.csv')
raw_notes_paths

['data/kindle/raw/Ego Is the Enemy-Notebook - Ego Is the Enemy-Notebook.csv',
 "data/kindle/raw/Can't Hurt Me_ Master Your Mind and Defy the Odds-Notebook.csv",
 'data/kindle/raw/The Inevitable_ Understanding the 12 Technological Forces That Will Shape Our Future-Notebook.csv']

In [5]:
for raw_notes_path in raw_notes_paths:
    format_kindle_notes_to_csv(raw_notes_path, KINDLE_FORMATTED_NOTES_DIRECTORY)

In [6]:
# format_kindle_notes_to_csv("Ego Is the Enemy-Notebook - Ego Is the Enemy-Notebook.csv", KINDLE_FORMATTED_NOTES_DIRECTORY)

In [7]:
# check results...
formatted_notes_paths = glob.glob(KINDLE_FORMATTED_NOTES_DIRECTORY+'/*.csv')
formatted_notes_paths

['data/kindle/ego_is_the_enemy_notes.csv',
 'data/kindle/the_inevitable:_understanding_the_12_technological_forces_that_will_shape_our_future_notes.csv',
 "data/kindle/can't_hurt_me:_master_your_mind_and_defy_the_odds_notes.csv"]

In [11]:
for formatted_notes_path in formatted_notes_paths:
    display(pd.read_csv(formatted_notes_path).sample(5))

Unnamed: 0,Annotation Type,Location,Starred?,Annotation,Author,Title
100,Highlight (Yellow),Location 2449,,what defines great leaders like Douglass is th...,Ryan Holiday,Ego Is The Enemy
34,Highlight (Yellow),Location 615,,"plus, minus, and equal. Each fighter, to becom...",Ryan Holiday,Ego Is The Enemy
41,Highlight (Yellow),Location 728,,What humans require in our ascent is purpose a...,Ryan Holiday,Ego Is The Enemy
35,Highlight (Yellow),Location 619,,“False ideas about yourself destroy you. For m...,Ryan Holiday,Ego Is The Enemy
13,Highlight (Yellow),Location 410,,"Facts are better than dreams, as Churchill put...",Ryan Holiday,Ego Is The Enemy


Unnamed: 0,Annotation Type,Location,Starred?,Annotation,Author,Title
4,Highlight (Yellow),Location 211,,"In this era of “becoming,” everyone becomes a ...",Kevin Kelly,The Inevitable: Understanding The 12 Technolog...
2,Highlight (Yellow),Location 202,,But as our personal technology is becoming mor...,Kevin Kelly,The Inevitable: Understanding The 12 Technolog...
3,Highlight (Yellow),Location 208,,Technological life in the future will be a ser...,Kevin Kelly,The Inevitable: Understanding The 12 Technolog...
0,Highlight (Yellow),Location 106,,We are morphing so fast that our ability to in...,Kevin Kelly,The Inevitable: Understanding The 12 Technolog...
1,Highlight (Yellow),Location 146,,Our greatest invention in the past 200 years w...,Kevin Kelly,The Inevitable: Understanding The 12 Technolog...


Unnamed: 0,Annotation Type,Location,Starred?,Annotation,Author,Title
27,Highlight (Yellow),Page 114,,Everything in life is a mind game! Whenever we...,David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
60,Highlight (Yellow),Page 213,,"staying in the fight is always the hardest, an...",David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
54,Highlight (Yellow),Page 209,,I’m not down with the prevailing mentalities t...,David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
47,Highlight (Yellow),Page 162,,"Before I engage in any challenging activity, I...",David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
81,Highlight (Yellow),Page 256,,The point is not to allow a setback to shatter...,David Goggins,Can'T Hurt Me: Master Your Mind And Defy The Odds
