# Scrape stroke order diagrams from www.kaku-navi.com
Loops automatically through a list of kanji.

In [1]:
import io
import math
import requests
import time

from bs4 import BeautifulSoup
from PIL import Image, ImageOps

## Set parameters
The list `KANJI_LIST` should contain the kanji to generate stroke order diagrams for.

In [2]:
KANJI_LIST = ['挨',
 '剥',
 '箸',
 '斑',
 '氾',
 '汎',
 '眉',
 '膝',
 '肘',
 '蔽',
 '蔑',
 '蜂',
 '貌',
 '睦',
 '勃',
 '昧',
 '枕',
 '蜜',
 '冥',
 '麺',
 '罵',
 '餅',
 '捻',
 '匂',
 '綻',
 '酎',
 '捗',
 '椎',
 '潰',
 '爪',
 '鶴',
 '諦',
 '溺',
 '貼',
 '妬',
 '賭',
 '藤',
 '憧',
 '瞳',
 '頓',
 '那',
 '謎',
 '鍋',
 '虹',
 '冶',
 '弥',
 '湧',
 '慄',
 '憬',
 '拉',
 '摯',
 '曖',
 '楷',
 '鬱',
 '璧',
 '瘍',
 '箋',
 '籠',
 '緻',
 '羞',
 '訃',
 '諧',
 '貪',
 '踪',
 '辣',
 '錮',
 '惧',
 '恣',
 '彙',
 '毀',
 '妖',
 '沃',
 '嵐',
 '藍',
 '璃',
 '侶',
 '瞭',
 '瑠',
 '呂',
 '旦',
 '賂',
 '麓',
 '脇',
 '丼',
 '傲',
 '刹',
 '哺',
 '喩',
 '嗅',
 '嘲',
 '弄',
 '塡',
 '誰',
 '堆',
 '釜',
 '鎌',
 '瓦',
 '韓',
 '玩',
 '伎',
 '畿',
 '亀',
 '僅',
 '巾',
 '錦',
 '駒',
 '串',
 '窟',
 '稽',
 '詣',
 '隙',
 '桁',
 '拳',
 '葛',
 '鍵',
 '顎',
 '骸',
 '宛',
 '闇',
 '椅',
 '畏',
 '萎',
 '咽',
 '淫',
 '臼',
 '唄',
 '餌',
 '怨',
 '艶',
 '旺',
 '臆',
 '俺',
 '苛',
 '牙',
 '崖',
 '蓋',
 '柿',
 '舷',
 '股',
 '虎',
 '凄',
 '醒',
 '戚',
 '脊',
 '煎',
 '羨',
 '腺',
 '詮',
 '膳',
 '曽',
 '狙',
 '遡',
 '爽',
 '痩',
 '捉',
 '袖',
 '遜',
 '汰',
 '唾',
 '裾',
 '須',
 '腎',
 '芯',
 '乞',
 '勾',
 '喉',
 '梗',
 '頃',
 '痕',
 '沙',
 '挫',
 '塞',
 '戴',
 '采',
 '拶',
 '斬',
 '叱',
 '嫉',
 '腫',
 '呪',
 '蹴',
 '拭',
 '尻',
 '柵',
 '頰']

`MAX_PER_ROW` indicated how many diagrams should be placed in a row at most.

In [3]:
MAX_PER_ROW = 10

## Index page by Kanken levels
This page provides the mapping between the desired kanji and the associated page on the website.

In [4]:
INDEX_PAGE = 'https://kaku-navi.com/kanken/'

result = requests.get(INDEX_PAGE)

if result.status_code == 200:
    index_page = BeautifulSoup(result.content, "html.parser")
else:
    print('Request failed.')

Function to find the link to the desired kanji.

In [5]:
def link_to_kanji_page(index_page, kanji):
    return [s for s in index_page.find_all('a') if kanji in s][0]

## Analyse the kanji page to find the links the to stroke order diagrams

In [6]:
def find_links_to_diagrams(link):
    kaku_link = 'https://kaku-navi.com/' + link['href'][3:]
    
    result = requests.get(kaku_link)
    
    if result.status_code == 200:
        soup = BeautifulSoup(result.content, "html.parser")
        return [i for i in soup.find_all('img') if '画目' in i['alt']]
    
    return None

## Fetch all the stroke order diagrams

In [7]:
def fetch_stroke_diagrams(strokes):
    stroke_diagrams = []
    
    for stroke in strokes:
        img_link = 'https://kaku-navi.com' + stroke['src']
        
        img_content = requests.get(img_link)
        
        if result.status_code == 200:
            img_file = io.BytesIO(img_content.content)
        else:
            print(f'  Failed to get {img_link}')
            time.sleep(0.2)
            continue
        
        img = Image.open(img_file).convert('RGB')
        img_gray = ImageOps.grayscale(img)
        stroke_diagrams.append(img_gray)
        
        print(f'  Fetched {img_link}')
        time.sleep(0.2)
    
    return stroke_diagrams

## Combine the diagrams into a single figure with all the steps

In [8]:
def combine_diagrams(stroke_diagrams):
    N_ROWS = math.ceil(len(stroke_diagrams) / MAX_PER_ROW)
    combined_diagram = Image.new(mode='L', size=(640 * MAX_PER_ROW, 640 * N_ROWS), color=255)
    
    x_offset = 0
    y_offset = 0
    
    for diagram in stroke_diagrams:
        combined_diagram.paste(diagram, (x_offset, y_offset))
        x_offset += 640
        if x_offset > 640 * (MAX_PER_ROW - 1):
            x_offset = 0
            y_offset += 640
    
    return combined_diagram

## Automatically loop through the desired list of kanji

In [9]:
for kanji in KANJI_LIST:
    print(f'Processing kanji {kanji}...')
    link = link_to_kanji_page(index_page=index_page, kanji=kanji)
    links_to_strokes = find_links_to_diagrams(link=link)
    stroke_diagrams = fetch_stroke_diagrams(strokes=links_to_strokes)
    combined_diagram = combine_diagrams(stroke_diagrams=stroke_diagrams)
    combined_diagram.save(f'./data/wr_old_supplements/stroke_order_{kanji}.jpg')
    print(f'  Saved.')

Processing kanji 挨...
  Fetched https://kaku-navi.com/img/kanji/kanji05417_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05417_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05417_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05417_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05417_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05417_06.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05417_07.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05417_08.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05417_09.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05417_10.jpg
  Saved.
Processing kanji 剥...
  Fetched https://kaku-navi.com/img/kanji/kanji01126_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01126_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01126_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01126_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01126_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji

  Saved.
Processing kanji 貌...
  Fetched https://kaku-navi.com/img/kanji/kanji16013_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_06.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_07.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_08.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_09.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_10.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_11.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_12.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_13.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16013_14.jpg
  Saved.
Processing kanji 睦...
  Fetched https://kaku-navi.com/img/kanji/kanji10599_01.jpg
  Fetched https://kaku-navi.com/img/ka

  Fetched https://kaku-navi.com/img/kanji/kanji05500_09.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05500_10.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji05500_11.jpg
  Saved.
Processing kanji 匂...
  Fetched https://kaku-navi.com/img/kanji/kanji01283_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01283_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01283_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01283_04.jpg
  Saved.
Processing kanji 綻...
  Fetched https://kaku-navi.com/img/kanji/kanji12220_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji12220_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji12220_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji12220_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji12220_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji12220_06.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji12220_07.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji12220_08.jpg
  Fetched https://kaku-navi.com/img/ka

  Fetched https://kaku-navi.com/img/kanji/kanji16061_10.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16061_11.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16061_12.jpg
  Saved.
Processing kanji 妬...
  Fetched https://kaku-navi.com/img/kanji/kanji02989_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji02989_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji02989_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji02989_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji02989_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji02989_06.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji02989_07.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji02989_08.jpg
  Saved.
Processing kanji 賭...
  Fetched https://kaku-navi.com/img/kanji/kanji16110_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16110_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16110_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16110_04.jpg
  Fetched https://kaku-navi.com/img/ka

  Fetched https://kaku-navi.com/img/kanji/kanji14458_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14458_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14458_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14458_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14458_06.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14458_07.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14458_08.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14458_09.jpg
  Saved.
Processing kanji 冶...
  Fetched https://kaku-navi.com/img/kanji/kanji00951_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00951_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00951_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00951_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00951_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00951_06.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00951_07.jpg
  Saved.
Processing kanji 弥...
  Fetched https://kaku-navi.com/img/ka

  Fetched https://kaku-navi.com/img/kanji/kanji19762_17.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_18.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_19.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_20.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_21.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_22.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_23.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_24.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_25.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_26.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_27.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_28.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji19762_29.jpg
  Saved.
Processing kanji 璧...
  Fetched https://kaku-navi.com/img/kanji/kanji09896_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji09896_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji09896_03.jpg
  Fetched

  Saved.
Processing kanji 貪...
  Fetched https://kaku-navi.com/img/kanji/kanji16043_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_06.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_07.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_08.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_09.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_10.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16043_11.jpg
  Saved.
Processing kanji 踪...
  Fetched https://kaku-navi.com/img/kanji/kanji16427_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16427_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16427_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji16427_04.jpg
  Fetched https://kaku-navi.com/img/ka

  Fetched https://kaku-navi.com/img/kanji/kanji14286_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_06.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_07.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_08.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_09.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_10.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_11.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_12.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_13.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_14.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_15.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_16.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_17.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji14286_18.jpg
  Saved.
Processing kanji 璃...
  Fetched

  Fetched https://kaku-navi.com/img/kanji/kanji00691_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_04.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_05.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_06.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_07.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_08.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_09.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_10.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_11.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_12.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji00691_13.jpg
  Saved.
Processing kanji 刹...
  Fetched https://kaku-navi.com/img/kanji/kanji01082_01.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01082_02.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01082_03.jpg
  Fetched https://kaku-navi.com/img/kanji/kanji01082_04.jpg
  Fetched

IndexError: list index out of range