# Overview

The goal of this code is to use the Wanikani API to retrieve review items that have not been "burned" by the user despite availability for a long time so that the user can further review those items. The end output of this code is two .csv files that can be imported into Anki to be used as flash deck cards for further study.

In [34]:
import os
import json
import sqlite3
import pandas as pd
import requests
import datetime
from pprint import pprint

token = 'YOUR TOKEN HERE'

# Function Definitions

In [35]:
def dbg_print(statement, cond = True):
    if cond == True:
        print(statement)
        
debug_cond = False

The definition of wanikani_req is due to trunklayer. (See https://community.wanikani.com/t/python-api-call-headers-problem/45614/2)

In [36]:
def wanikani_req(token, endpoint):
    
    address = 'https://api.wanikani.com/v2'

    headers = {
        'Authorization': f'Bearer {token}'
    }

    with requests.get(f'{address}/{endpoint}', headers=headers) as r:
        response = r.json()
    #pprint(response)
    
    return response


If a request from the wanikani API is too big, it is split into multiple pages. The following code ensures that all requests are pulled.

In [37]:
def wanikani_pull(token, endpoint, debug_cond = False):
    #ACQUIRING THE RESOURCES
    #First, let's get a list of all the requests. Unfortunately, we are limited to 500 resources returned for collection endpoints
    #This means if there's more than 500 resources we need to return, we need to navigate Wanikani's pagination system.
    #(EXCEPTION TO THE ABOVE, some API calls return 1000 resources at a time.)
    
    orig_endpoint = endpoint
    dbg_print("TOKEN SPECIFIED: " + token, debug_cond)
    dbg_print("ENDPOINT SPECIFIED: " + orig_endpoint, debug_cond)
    
    assignment_arr = []

    req = wanikani_req(token, endpoint)
    requests_to_get = req['total_count']

    dbg_print("TOTAL REQUESTS TO GET: " + str(requests_to_get), debug_cond)

    assignment_arr = assignment_arr + req['data']
    requests_to_get = requests_to_get - len(req['data'])

    dbg_print("After batch 1 added...", debug_cond)
    dbg_print("TOTAL REQUESTS TO GET: " + str(requests_to_get), debug_cond)

    i = 1
    while requests_to_get > 0:
        i = i + 1
        
        #Get the info from the JSON that we need to pull the next page
        url_info = str(req['pages']['next_url'])
        address = 'https://api.wanikani.com/v2'
        endpoint = url_info[len(address)+1:]
        
        #Now, make a request with the new endpoint
        dbg_print("NEW ENDPOINT: " + endpoint, debug_cond)
        req = wanikani_req(token, endpoint)
        assignment_arr = assignment_arr + req['data']
        requests_to_get = requests_to_get - len(req['data'])

        dbg_print("After batch " + str(i) + " added... ", debug_cond)
        dbg_print("TOTAL REQUESTS TO GET: " + str(requests_to_get), debug_cond)

    #This final step is so that our data appears in a "JSON-like" format
    assignment_dict = {"data": assignment_arr}
    dbg_print("PULL COMPLETED!", debug_cond)
    return assignment_dict

Before I can pass the dates as date variables into the SQL database, I need to clean them up to meet SQL's datetime format

In [38]:
def date_cleanup(date):
    #The argument will take the form of something like '2021-09-29T00:27:27.473588Z'
    try:
        SQL_date = date[0:10] + ' ' + date[11:11+8]
    except:
        SQL_date = "NULL"
    
    return SQL_date

Useful helper function for computing the date and time of some days ago (and returning it as a string that can be used in SQL queries).

In [39]:
def x_days_ago(days):
    days = days

    current_datetime = datetime.datetime.now()
    cutoff_line = current_datetime - datetime.timedelta(days = days)

    date_str = cutoff_line.strftime("%Y-%m-%d")
    time_str = cutoff_line.strftime("%H:%M:%S")

    cutoff_date =  date_str + " " + time_str
    
    return cutoff_date

In case I need to clean the slate...

In [40]:
# If there's already a database file with the name we want, let's trash it first before doing anything:
def sqlite_delete(db_path):
    db_path = db_path

    # Check if the file exists
    if os.path.exists(db_path):
        # Delete the file
        os.remove(db_path)
        print(f'Database {db_path} deleted.')
    else:
        print(f'Database {db_path} does not exist.')

In [41]:
def sqlite_schema(db_path, schema):
    # create an SQLite schema based on JSON-like data
    conn = sqlite3.connect(db_path)
    conn.execute(schema)
    print('Schema at path ' + db_path + ' successfully executed')
    conn.close()

# Building a Database of Assignments

Next, let's create the "schema", or structure for the SQL database that we'll use.

In [42]:
# create an SQL schema based on the JSON-like data
db_path = 'wanikani.sqlite'
sqlite_delete(db_path)

conn = sqlite3.connect(db_path)
conn.execute("DROP TABLE IF EXISTS assignments")
conn.execute("DROP TABLE IF EXISTS subjects")

schema_assignments = '''
CREATE TABLE assignments (
    id INTEGER PRIMARY KEY,
    subject_id INTEGER,
    subject_type TEXT,
    srs_stage INTEGER,
    unlocked_at DATETIME,
    started_at DATETIME,
    passed_at DATETIME,
    burned_at DATETIME,
    available_at DATETIME,
    resurrected_at DATETIME,
    hidden BOOL
);
'''

schema_subjects = '''
CREATE TABLE subjects (
    subject_id INTEGER PRIMARY KEY,
    level INTEGER,
    type TEXT,
    slug TEXT,
    reading TEXT,
    meaning TEXT
);
'''

sqlite_schema(db_path, schema_assignments)
sqlite_schema(db_path, schema_subjects)

conn.close()

Database wanikani.sqlite deleted.
Schema at path wanikani.sqlite successfully executed
Schema at path wanikani.sqlite successfully executed


Finally, let's build the database

In [43]:
endpoint = 'assignments?started=true'
assignment_dict = wanikani_pull(token, endpoint)

conn = sqlite3.connect(db_path)
# insert the data into the database
for obj in assignment_dict['data']:
    #Note, the dates need to all be cleaned up before I can pass
    val_1 = obj['id']
    val_2 = obj['data']['subject_id']
    val_3 = obj['data']['subject_type']
    val_4 = obj['data']['srs_stage']
    val_5 = date_cleanup(obj['data']['unlocked_at'])
    val_6 = date_cleanup(obj['data']['started_at'])
    val_7 = date_cleanup(obj['data']['passed_at'])
    val_8 = date_cleanup(obj['data']['burned_at'])
    val_9 = date_cleanup(obj['data']['available_at'])
    val_10 = date_cleanup(obj['data']['resurrected_at'])
    val_11 = obj['data']['hidden']
    
    conn.execute("INSERT INTO assignments (id, subject_id, subject_type, srs_stage, unlocked_at, started_at, passed_at, burned_at, available_at, resurrected_at, hidden) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", (val_1, val_2, val_3, val_4, val_5, val_6, val_7, val_8, val_9, val_10, val_11))

# commit the changes to the database
conn.commit()
print("CHANGES COMMITTED TO DATABASE!")

endpoint = 'subjects?types=kanji,vocabulary'
assignment_dict = wanikani_pull(token, endpoint)

conn = sqlite3.connect('wanikani.sqlite')
# insert the data into the database
for obj in assignment_dict['data']:
    #Note, the dates need to all be cleaned up before I can pass
    val_1 = obj['id']
    val_2 = obj['data']['level']
    val_3 = obj['object']
    val_4 = obj['data']['slug']
    val_5 = obj['data']['readings'][0]['reading']
    val_6 = obj['data']['meanings'][0]['meaning']
    
    conn.execute("INSERT INTO subjects (subject_id, level, type, slug, reading, meaning) VALUES (?, ?, ?, ?, ?, ?)", (val_1, val_2, val_3, val_4, val_5, val_6))

# commit the changes to the database
conn.commit()
print("CHANGES COMMITTED TO DATABASE!")

# close the database connection
conn.close()

CHANGES COMMITTED TO DATABASE!
CHANGES COMMITTED TO DATABASE!


In [44]:
conn = sqlite3.connect(db_path)
conn.execute("DROP TABLE IF EXISTS main")

query =  f"CREATE TABLE main AS SELECT * FROM subjects INNER JOIN assignments ON subjects.subject_id = assignments.subject_id"
conn.execute(query)

cutoff_date = x_days_ago(360)

query_table = pd.read_sql_query(f"SELECT slug || ' (reading)' as item, reading FROM main WHERE burned_at = 'NULL' AND started_at < '{cutoff_date}' ORDER BY subject_id ASC;", conn)
query_table.to_csv('readings_to_practice.csv', index=False)
pprint(query_table)

query_table = pd.read_sql_query(f"SELECT slug || ' (meaning)' as item, meaning FROM main WHERE burned_at = 'NULL' AND started_at < '{cutoff_date}' ORDER BY subject_id ASC;", conn)
query_table.to_csv('meanings_to_practice.csv', index=False)
pprint(query_table)

conn.close()

              item reading
0      右 (reading)      ゆう
1      石 (reading)      せき
2      立 (reading)      りつ
3      友 (reading)      ゆう
4      少 (reading)     しょう
5      心 (reading)      しん
6      写 (reading)      しゃ
7      土 (reading)      つち
8     六日 (reading)     むいか
9     王子 (reading)     おうじ
10    戸口 (reading)     とぐち
11     方 (reading)      かた
12     北 (reading)      きた
13    中古 (reading)    ちゅうこ
14    広い (reading)     ひろい
15    公用 (reading)    こうよう
16    先ず (reading)      まず
17    先日 (reading)    せんじつ
18    名人 (reading)    めいじん
19   一文字 (reading)   いちもんじ
20    元気 (reading)     げんき
21    人口 (reading)    じんこう
22    七日 (reading)     なのか
23   丸ごと (reading)    まるごと
24  口にする (reading)   くちにする
              item              meaning
0      右 (meaning)                Right
1      石 (meaning)                Stone
2      立 (meaning)                Stand
3      友 (meaning)               Friend
4      少 (meaning)                  Few
5      心 (meaning)                Heart
6      写 (meaning)