## Original code

In [1]:
import os

import numpy as np
from PIL import Image
import sqlite3
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

INCLUDED_EXTENSION = [".png", ".jpg"]

dbname = 'images.db'

conn = sqlite3.connect(dbname)
cur = conn.cursor()
cur.execute('DROP TABLE image_info')
cur.execute('CREATE TABLE image_info (id INTEGER PRIMARY KEY AUTOINCREMENT, filename STRING)')
conn.commit()
conn.close()

conn = sqlite3.connect(dbname)
cur = conn.cursor()
filenames = sorted(os.listdir('handwriting_pics'))
for filename in filenames:
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENSION:
        continue
    cur.execute('INSERT INTO image_info(filename) values(?)', (filename, ))
conn.commit()
cur.close()
conn.close()

conn = sqlite3.connect(dbname)
cur = conn.cursor()
cur.execute('SELECT * FROM image_info')
pics_info = cur.fetchall()
cur.close()
conn.close()

img_test = np.empty((0, 64))
for pic_info in pics_info:
    filename = pic_info[1]
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENSION:
        continue
    img = Image.open(f'handwriting_pics/{filename}').convert('L')
    img_data256 = 255 - np.array(img.resize((8, 8)))

    min_bright = img_data256.min()
    max_bright = img_data256.max()
    img_data16 = (img_data256 - min_bright) / (max_bright - min_bright) * 16
    img_test = np.r_[img_test, img_data16.astype(np.uint8).reshape(1, -1)]

digits = load_digits()
X = digits.data
y = digits.target
X_tarin, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
logreg = LogisticRegression(max_iter=2000)
logreg_model = logreg.fit(X_tarin, y_train)

X_true = []
for filename in filenames:
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENSION:
        continue
    X_true = X_true + [int(filename[:1])]
X_true = np.array(X_true)
pred_logreg = logreg_model.predict(img_test)

print('Discrimination results of hand-written characters')
print('Observation: ', X_true)
print('Prediction: ', pred_logreg)
print('Accuracy: ', logreg_model.score(img_test, X_true))

Discrimination results of hand-written characters
Observation:  [0 1 2 3 4 5 6 7 8 9]
Prediction:  [4 4 4 4 4 4 4 7 4 4]
Accuracy:  0.2


## Data access

In [2]:
INCLUDED_EXTENSION = [".png", ".jpg"]

# DB create
dbname = 'images.db'
conn = sqlite3.connect(dbname)
cur = conn.cursor()

# init DB
cur.execute('DROP TABLE image_info')
# Create image_info table
cur.execute('CREATE TABLE image_info (id INTEGER PRIMARY KEY AUTOINCREMENT, filename STRING)')
# DB commit
conn.commit()
conn.close()

# Insert image file name into DB
conn = sqlite3.connect(dbname)
cur = conn.cursor()
filenames = sorted(os.listdir('handwriting_pics'))
for filename in filenames:
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENSION:
        continue
    cur.execute('INSERT INTO image_info(filename) values(?)', (filename, ))
conn.commit()
cur.close()
conn.close()

# Load image info
conn = sqlite3.connect(dbname)
cur = conn.cursor()
cur.execute('SELECT * FROM image_info')
pics_info = cur.fetchall()
cur.close()
conn.close()

## Preprocessing

In [3]:
img_test = np.empty((0, 64))
# Convert images in the directory to data
for pic_info in pics_info:
    filename = pic_info[1]
    # Grayscaling and resizing
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENSION:
        continue
    img = Image.open(f'handwriting_pics/{filename}').convert('L')
    img_data256 = 255 - np.array(img.resize((8, 8)))

    # The minimum brightness is 0 and the maximum brightness is 16.
    min_bright = img_data256.min()
    max_bright = img_data256.max()
    img_data16 = (img_data256 - min_bright) / (max_bright - min_bright) * 16
    # Put the processed data into an array
    img_test = np.r_[img_test, img_data16.astype(np.uint8).reshape(1, -1)]

## Learning/prediction/calculation

In [4]:
# load data
digits = load_digits()
# Divided into objective variables and explanatory variables
X = digits.data
y = digits.target
# Separate into training data and test data
X_tarin, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
# Create a logistic regression model. Learning with supervised data.
logreg = LogisticRegression(max_iter=2000)
logreg_model = logreg.fit(X_tarin, y_train)

# Correct answer array
X_true = []
for filename in filenames:
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENSION:
        continue
    X_true = X_true + [int(filename[:1])]
X_true = np.array(X_true)

# Distinguish using a trained model
pred_logreg = logreg_model.predict(img_test)

print('Discrimination results of hand-written characters')
print('Observation: ', X_true)
print('Prediction: ', pred_logreg)
print('Accuracy: ', logreg_model.score(img_test, X_true))

Discrimination results of hand-written characters
Observation:  [0 1 2 3 4 5 6 7 8 9]
Prediction:  [4 4 4 4 4 4 4 7 4 4]
Accuracy:  0.2


# Function division

In [5]:
INCLUDED_EXTENSION = [".png", ".jpg"]
dbname = 'images.db'
dir_name = 'handwriting_pics'


def load_filenames(dir_name, include_ext=INCLUDED_EXTENSION):
    files = []
    filenames = sorted(os.listdir(dir_name))
    for filename in filenames:
        base, ext = os.path.splitext(filename)
        if ext not in include_ext:
            continue
        files.append(filename)
    return files


def create_table(dbname):
    '''Create DB table'''
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()
    # init DB
    cur.execute('DROP TABLE image_info')
    # Create image_info table
    cur.execute('CREATE TABLE image_info (id INTEGER PRIMARY KEY AUTOINCREMENT, filename STRING)')
    # DB commit
    conn.commit()
    conn.close()
    print("table is successully created")


def insert_filename(dbname, dir_name):
    '''Insert image file name into DB'''
    filenames = sorted(os.listdir('handwriting_pics'))
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()
    for filename in filenames:
        cur.execute('INSERT INTO image_info(filename) values(?)', (filename, ))
    conn.commit()
    cur.close()
    conn.close()
    print("image file names are successully instred")


def extract_filenames(dbname):
    '''Load image info'''
    conn = sqlite3.connect(dbname)
    cur = conn.cursor()
    cur.execute('SELECT * FROM image_info')
    filenames = cur.fetchall()
    cur.close()
    conn.close()
    return filenames


In [6]:
create_table(dbname)
insert_filename(dbname, dir_name)
extract_filenames(dbname)

table is successully created
image file names are successully instred


[(1, '0.jpg'),
 (2, '1.jpg'),
 (3, '2.jpg'),
 (4, '3.jpg'),
 (5, '4.jpg'),
 (6, '5.jpg'),
 (7, '6.jpg'),
 (8, '7.jpg'),
 (9, '8.jpg'),
 (10, '9.jpg')]

In [7]:
def get_grayscale(dir_name):
    """Convert to grayscale images"""
    filenames = load_filenames(dir_name)
    for filename in filenames:
        img = Image.open(f'handwriting_pics/{filename}').convert('L')
        yield img


def get_shrinked_img(dir_name):
    """Convert images in the directory to data"""
    img_test = np.empty((0, 64))
    crop_size = 8
    for img in get_grayscale(dir_name):
        img_data256 = 255 - np.array(img.resize((crop_size, crop_size)))
        # The minimum brightness is 0 and the maximum brightness is 16.
        min_bright, max_bright = img_data256.min(), img_data256.max()
        img_data16 = (img_data256 - min_bright) / (max_bright - min_bright) * 16
        # Put the processed data into an array
        img_test = np.r_[img_test, img_data16.astype(np.uint8).reshape(1, -1)]
    return img_test


In [8]:
get_shrinked_img(dir_name)

array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  8., 16.,  0.,  0.,  0.,  0.,  0.,
         0., 16., 16.,  8.,  0.,  0.,  0.,  0.,  0.,  8.,  8.,  8.,  0.,
         0.,  0.,  0.,  0.,  8., 16.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0., 16.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  8.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  8.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  3.,  3.,
         0.,  0.,  0.,  0.,  0.,  0.,  6.,  3.,  0.,  0.,  0.,  0.,  0.,
         3.,  3.,  3.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  9.,  3.,  0.,
         0.,  0.,  0.,  3., 16.,  9.,  6.,  0.,  0.,  0.,  

In [9]:
def create_logreg_model():
    """Create a logistic regression model. Learning with supervised data."""
    # load data
    digits = load_digits()
    # Divided into objective variables and explanatory variables
    X = digits.data
    y = digits.target
    # Separate into training data and test data
    X_tarin, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
    logreg = LogisticRegression(max_iter=2000)
    logreg_model = logreg.fit(X_tarin, y_train)
    return logreg_model


def evaluate_probs(dir_name, img_test, logreg_model):
    """Evaluate test data"""
    filenames = load_filenames(dir_name)
    # Correct answer array
    X_true = [int(filename[:1]) for filename in filenames]
    X_true = np.array(X_true)
    # Distinguish using a trained model
    pred_logreg = logreg_model.predict(img_test)
    print('Observation: ', X_true)
    print('Prediction: ', pred_logreg)
    print('Accuracy: ', logreg_model.score(img_test, X_true))
    return "Propability calculation is successfully finished"

In [10]:
logreg_model = create_logreg_model()
evaluate_probs(dir_name, img_test, logreg_model)

Observation:  [0 1 2 3 4 5 6 7 8 9]
Prediction:  [4 4 4 4 4 4 4 7 4 4]
Accuracy:  0.2


'Propability calculation is successfully finished'