In [3]:
import os
import numpy as np
from PIL import Image
import sqlite3
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

INCLUDED_EXTENTION = [".png", ".jpg"]

# Specify photo holder and get file name for matadata.
# Make database or connect db if db already exists.
dbname = 'images.db'
# Make connection object to db.
conn = sqlite3.connect(dbname)
# Make cursor object to operate sqlite.
cur = conn.cursor()
# Initializaion of db.
cur.execute('DROP TABLE image_info')
cur.execute('CREATE TABLE image_info (id INTEGER PRIMARY KEY AUTOINCREMENT, filename STRING)')
# Commit to db and save change things.
conn.commit()
conn.close()

# Insert to db filename of photo.
conn = sqlite3.connect(dbname)
cur = conn.cursor()
filenames = sorted(os.listdir('handwriting_pics'))
for filename in filenames:
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENTION:
        continue
    cur.execute('INSERT INTO image_info(filename) values(?)', (filename,))
conn.commit()
cur.close()
conn.close()

# Get table context.
conn = sqlite3.connect(dbname)
cur = conn.cursor()
cur.execute('SELECT * FROM image_info')
# Get photo data
pics_info = cur.fetchall()
cur.close()
conn.close()

# Data Preprocessing
img_test = np.empty((0, 64))
# Digitization of all photos in folder.
for pic_info in pics_info:
    filename = pic_info[1]
    # Get photo data and grayscale.
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENTION:
        continue
    img = Image.open(f'handwriting_pics/{filename}').convert('L')
    img_data256 = 255 - np.array(img.resize((8, 8)))

    # Calculate photodata to do minvalue=0, maxvalue=16. 
    min_bright = img_data256.min()
    max_bright = img_data256.max()
    img_data16 = (img_data256 - min_bright) / (max_bright - min_bright) * 16
    # Summarize photodata array.
    img_test = np.r_[img_test, img_data16.astype(np.uint8).reshape(1, -1)]

# Learn data.
digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
logreg = LogisticRegression(max_iter=2000)
logreg_model = logreg.fit(X_train, y_train)

# Arrays true data.
X_true = []
for filename in filenames:
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENTION:
        continue
    X_true = X_true + [int(filename[:1])]
X_true = np.array(X_true)

# Predict data.
pred_logreg = logreg_model.predict(img_test)

print('手書き文字の判別結果')
print('観測結果:', X_true)
print('予測結果:', pred_logreg)
print('正解率:', logreg_model.score(img_test, X_true))

手書き文字の判別結果
観測結果: [0 1 2 3 4 5 6 7 8 9]
予測結果: [4 4 4 4 4 4 4 7 4 4]
正解率: 0.2


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
