## Requests library


In [1]:
import pandas as pd

In [2]:
import requests

URL = "https://readwise.io/api/v2/auth/"
books = "https://readwise.io/api/v2/books/"
highlights = "https://readwise.io/api/v2/highlights/"

export = "https://readwise.io/api/v2/export/"

auth_header = {'Authorization' : "Token XXXXXX"}

# INSERT YOUR TOKEN ABOVE ↑

In [3]:


def fetch_from_export_api(updated_after=None):
    full_data = []
    next_page_cursor = None
    while True:
        params = {}
        if next_page_cursor:
            params['pageCursor'] = next_page_cursor
        if updated_after:
            params['updatedAfter'] = updated_after
        print("Making export api request with params " + str(params) + "...")
        response = requests.get(
            url="https://readwise.io/api/v2/export/",
            params=params,
            headers=auth_header, verify=False
        )
        full_data.extend(response.json()['results'])
        next_page_cursor = response.json().get('nextPageCursor')
        if not next_page_cursor:
            break
    return full_data



In [4]:
# Get all of a user's books/highlights from all time
all_data = fetch_from_export_api()

Making export api request with params {}...




Making export api request with params {'pageCursor': 21090053}...




Making export api request with params {'pageCursor': 15216439}...




Making export api request with params {'pageCursor': 12953763}...




Making export api request with params {'pageCursor': 9894774}...




Making export api request with params {'pageCursor': 6297743}...




In [5]:
all_data

[{'user_book_id': 31828755,
  'title': 'The PARA Method: Simplify, Organise and Master Your Digital Life',
  'author': 'Tiago Forte',
  'readable_title': 'The PARA Method',
  'source': 'kindle',
  'cover_image_url': 'https://m.media-amazon.com/images/I/61n2ppUc1AL._SY160.jpg',
  'unique_url': None,
  'book_tags': [],
  'category': 'books',
  'document_note': None,
  'readwise_url': 'https://readwise.io/bookreview/31828755',
  'source_url': None,
  'asin': 'B0BZ1GNCZG',
  'highlights': [{'id': 590605098,
    'text': 'In contrast, PARA is “platform agnostic,” meaning it is one system that can be implemented everywhere, including: •\xa0\xa0Your to-do list app •\xa0\xa0Your computer file system (or Documents folder) •\xa0\xa0Your cloud storage drive •\xa0\xa0Your digital notetaking app •\xa0\xa0Other platforms where information is stored',
    'location': 658,
    'location_type': 'location',
    'note': '',
    'color': 'yellow',
    'highlighted_at': '2023-09-03T04:56:00Z',
    'created_

In [6]:
highlight_dict_2 = {}

for resource in all_data:
	title = resource['title']
	author = resource['author']
	category = resource['category']
	cover_image_url = resource['cover_image_url']
	resource_id = resource['user_book_id']
	source_url = resource['source_url']

	for highlight in resource['highlights']:
		highlight_id = highlight['id']
		text = highlight['text']
		note = highlight['note']
		location = highlight['location']
		date = highlight['updated_at']
		color = highlight['color']
		url = highlight['url']
		is_favorite = highlight['is_favorite']
		#is_discarded = highlight['is_discarded']


		highlight_dict_2[highlight_id] = { 'resource_id':resource_id, 'resource_title':title, 'author':author, 'text':text, 'note':note, 'location':location, 'date':date, 'category':category, 'cover_image_url':cover_image_url, 'source_url':source_url, 'color':color, 'url':url, 'is_favorite':is_favorite}#, 'is_discarded':is_discarded}


In [7]:
#Create dataframe with highlights_dict

def create_dataframe(highlight_dict):
	df = pd.DataFrame.from_dict(highlight_dict_2, orient='index')
	#df.reset_index(inplace=True)
	df.columns = ['resource_id', 'resource_title', 'author', 'text', 'note', 'location', 'date', 'category', 'cover_image_url',  'source_url', 'color', 'url', 'is_favorite']
	return df


In [8]:
df = create_dataframe(highlight_dict_2)

In [9]:
df.to_pickle("all_highlights_df.pkl")