In [1]:
import os
import numpy as np
from PIL import Image
import sqlite3
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

INCLUDED_EXTENSION = [".png", ".jpg"]

dbname = 'images.db'
conn = sqlite3.connect(dbname)
cur = conn.cursor()
#cur.execute('DROP TABLE image_info')
cur.execute('CREATE TABLE image_info(id INTEGER PRIMARY KEY AUTOINCREMENT, filename STRING)')
conn.commit()
conn.close()
conn = sqlite3.connect(dbname)
cur = conn.cursor()
filenames = sorted(os.listdir('handwriting_pics'))
for filename in filenames:
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENSION:
        continue
    cur.execute('INSERT INTO image_info(filename) values(?)', (filename,))
conn.commit()
cur.close()
conn.close()

conn = sqlite3.connect(dbname)
cur = conn.cursor()
cur.execute('SELECT * FROM image_info')
pics_info = cur.fetchall()
cur.close()
conn.close()
img_test = np.empty((0, 64))
for pic_info in pics_info:
    filename = pic_info[1]
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENSION:
        continue
    img = Image.open(f'handwriting_pics/{filename}').convert('L')
    img_data256 = 255 - np.array(img.resize((8, 8)))

    min_bright = img_data256.min()
    max_bright = img_data256.max()
    img_data16 = (img_data256 - min_bright) / (max_bright - min_bright) * 16
    img_test = np.r_[img_test, img_data16.astype(np.uint8).reshape(1, -1)]

digits = load_digits()
X = digits.data
y = digits.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
logreg = LogisticRegression(max_iter=2000)
logreg_model = logreg.fit(X_train, y_train)
X_true = []
for filename in filenames:
    base, ext = os.path.splitext(filename)
    if ext not in INCLUDED_EXTENSION:
        continue
    X_true = X_true + [int(filename[:1])]
X_true = np.array(X_true)
pred_logreg = logreg_model.predict(img_test)

print('손글씨 문자의 판별 결과')
print('관측 결과:', X_true)
print('예측 결과:', pred_logreg)
print('정답률:', logreg_model.score(img_test, X_true))


손글씨 문자의 판별 결과
관측 결과: [0 1 2 3 4 5 6 7 8 9]
예측 결과: [1 1 4 4 4 4 4 7 4 9]
정답률: 0.4


In [2]:
import os, sqlite3
INCLUDED_EXTENSION=['.png', '.jpg']
dbname='images.db'
dir_name='handwriting_pics'
def load_filenames(dir_name,included_ext=INCLUDED_EXTENSION):
    ### 손글씨 폴더에서 파일명 확보 및 리스트 생성 ###
    files=[]
    filenames=sorted(os.listdir(dir_name))
    for filename in filenames:
        base,ext=os.path.splitext(filename)
        if ext not in included_ext:
            continue
        files.append(filename)
    return files
def create_table(dbname):
    ### 테이블을 생성하는 함수 ### 
    conn = sqlite3.connect(dbname)
    cur=conn.cursor()
    cur.execute('DROP TABLE image_info')
    cur.execute('CREATE TABLE image_info (id INTEGER PRIMARY KEY AUTOINCREMENT, filename STRING)')
    conn.commit()
    cur.close()
    conn.close()
    print('image_info table is created')
def insert_filenames(dbname, dir_name):
    ### 문자 이미지 파일명을 테이블에 입력하는 함수 ###
    filenames=load_filenames(dir_name) 
    conn = sqlite3.connect(dbname)
    cur=conn.cursor()
    for filename in filenames:
        cur.execute('INSERT INTO image_info(filename) values(?) ',(filename,))
    conn.commit()
    cur.close()
    conn.close()
    print('image file names are inserted into image_info table')
def extract_filenames(dbname):
    ### 문자 이미지 파일명을 테이블에서 조회하는 함수 ### 
    conn = sqlite3.connect(dbname)
    cur=conn.cursor()
    cur.execute('SELECT * FROM image_info')
    filenames = cur.fetchall()
    cur.close()
    conn.close()
    print('image file names are selected from image_info table')
    print(filenames)
create_table(dbname)
insert_filenames(dbname,dir_name)
extract_filenames(dbname)


image_info table is created
image file names are inserted into image_info table
image file names are selected from image_info table
[(1, '0.jpg'), (2, '1.jpg'), (3, '2.jpg'), (4, '3.jpg'), (5, '4.jpg'), (6, '5.jpg'), (7, '6.jpg'), (8, '7.jpg'), (9, '8.jpg'), (10, '9.jpg')]


In [3]:
import numpy as np
from PIL import Image
def get_grayscale(dir_name):
    ### 손글씨 이미지의 색을 grayscale로 변환 ###
    filenames=load_filenames(dir_name)
    for filename in filenames:
        img=Image.open(f'{dir_name}/{filename}').convert('L')
        yield img
    print("grayscale conversion is completed")
def get_shrinked_img(dir_name):
    ### 이미지 8*8, 밝기0~16, grayscale로 변환 ###
    img_test=np.empty((0,64))
    crop_size=8
    for img in get_grayscale(dir_name):
        img_data256 = 255 - np.array(img.resize((crop_size,crop_size)))
        min_bright = img_data256.min()
        max_bright = img_data256.max()
        img_data16 = (img_data256 - min_bright) / (max_bright - min_bright) * 16
        img_test=np.r_[img_test,img_data16.astype(np.uint8).reshape(1,-1)]
    return img_test
    print("image shrink is completed")
get_shrinked_img(dir_name)


grayscale conversion is completed


array([[ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  8., 16.,  0.,  0.,  0.,  0.,  0.,
         0., 16., 16.,  8.,  0.,  0.,  0.,  0.,  0.,  8.,  8.,  8.,  0.,
         0.,  0.,  0.,  0.,  8., 16.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0., 16.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  8.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  8.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,
         0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  3.,  3.,
         0.,  0.,  0.,  0.,  0.,  0.,  6.,  3.,  0.,  0.,  0.,  0.,  0.,
         3.,  3.,  3.,  0.,  0.,  0.,  0.,  0.,  0.,  0.,  9.,  3.,  0.,
         0.,  0.,  0.,  3., 16.,  9.,  6.,  0.,  0.,  0.,  

In [4]:
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

def create_logreg_model():
    ### 로지스틱 회귀의 학습 모델 생성 ###
    digits = load_digits()
    X = digits.data
    y = digits.target
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
    # LogisticRegression 학습
    logreg = LogisticRegression(max_iter=2000)
    logreg_model = logreg.fit(X_train, y_train)
    print("logistic model is created")
    return logreg_model
def evaluate_probs(dir_name,img_test,logreg_model):
    ### img_test를 이용한 모델 성능 평가 ###
    filenames=load_filenames(dir_name)
    X_true = []
    for filename in filenames:
        base, ext = os.path.splitext(filename)
        if ext not in INCLUDED_EXTENSION:
            continue
        X_true = X_true + [int(filename[:1])]
    X_true = np.array(X_true)
    pred_logreg = logreg_model.predict(img_test)
    print('손글씨 문자의 판별 결과')
    print('관측 결과:', X_true)
    print('예측 결과:', pred_logreg)
    print('정답률:', logreg_model.score(img_test, X_true))
img_test=get_shrinked_img(dir_name) 
logreg_model=create_logreg_model()
evaluate_probs(dir_name,img_test,logreg_model)


grayscale conversion is completed
logistic model is created
손글씨 문자의 판별 결과
관측 결과: [0 1 2 3 4 5 6 7 8 9]
예측 결과: [1 1 4 4 4 4 4 7 4 9]
정답률: 0.4


In [1]:
import pickle
from sklearn.datasets import load_digits
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split

### 로지스틱 회귀의 학습 모델 생성 ###
# 1. X,y 데이터 읽기
digits = load_digits()
X = digits.data
y = digits.target
# 2. 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
# 3. LogisticRegression 학습
logreg = LogisticRegression(max_iter=2000)
model = logreg.fit(X_train, y_train)
# 4. model.pickl로 저장
with open("./api/model.pickle", mode="wb") as fp:
    pickle.dump(model,fp)
