In [1]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
import sys
sys.path.insert(0, '/content/drive/My Drive/Colab Notebooks/FontStyler/src/data/')

In [0]:
import numpy as np
import os
import io
import csv
from common.dataset import PickledImageProvider
from common.dataset import KoreanFontDataset
from common.utils import pad_seq, bytes_to_file, \
    read_split_image, shift_and_resize_image, normalize_image, \
    tight_crop_image, add_padding

import matplotlib.pyplot as plt
%matplotlib inline

In [0]:
from PIL import Image
import os
import os.path

import torch.utils.data
import torchvision.transforms as transforms

def default_image_loader(path):
    return Image.open(path).convert('RGB')

class TripletImageLoader(torch.utils.data.Dataset): 
    def __init__(self, pickled, triplets_file_name, base_path=None, filenames_filename=None, transform=None,
                 loader=default_image_loader):
        """ 
        filenames_filename: 
            A text file with each line containing the path to an image e.g.,
            images/class1/sample.jpg
                
        triplets_file_name: 
            A text file with each line containing three integers, 
            where integer i refers to the i-th image in the filenames file. 
            For a line of intergers 'a b c', a triplet is defined such that image a is more 
            similar to image c than it is to image b, 
            e.g., 0 2017 42 
        """
        self.dset = pickled.examples
#         self.base_path = base_path  
#         self.filenamelist = []
#         for line in open(filenames_filename):
#             self.filenamelist.append(line.rstrip('\n'))
        triplets = []
        anchor_labels = [] #
        for line in open(triplets_file_name):
            triplets.append((line.split()[0], line.split()[1], line.split()[2])) # anchor, far, close
            anchor_labels.append(int(line.split()[3])) #
        self.triplets = triplets
        self.labels = anchor_labels #
        self.transform = transform
        self.loader = loader

    def __getitem__(self, index):
        path1, path2, path3 = self.triplets[index]
        anchor_label = self.labels[index]
        img1_tuple = self.dset[int(path1)]
        img2_tuple = self.dset[int(path2)]
        img3_tuple = self.dset[int(path3)]
        
        info = {                         # clustering을 위해 anchor_index도 추가하였다.
            'anchor_index': int(path1),
            'anchor_label': anchor_label
        }
        
        # byte만 사용할 예정
        img1, byte_1 = img1_tuple[0], img1_tuple[1]
        img2, byte_2 = img2_tuple[0], img2_tuple[1]
        img3, byte_3 = img3_tuple[0], img3_tuple[1]
        
        # bytes 타입을 numpy array로 변경 후 normalize
        img_arr_1 = np.array(Image.open(io.BytesIO(byte_1)))
        img_arr_1 = normalize_image(img_arr_1)
        
        img_arr_2 = np.array(Image.open(io.BytesIO(byte_2)))
        img_arr_2 = normalize_image(img_arr_2)
        
        img_arr_3 = np.array(Image.open(io.BytesIO(byte_3)))
        img_arr_3 = normalize_image(img_arr_3)

        cropped_image_1, cropped_image_size_1 = tight_crop_image(img_arr_1, verbose=False)
        centered_image_1 = add_padding(cropped_image_1, verbose=False)
        
        cropped_image_2, cropped_image_size_2 = tight_crop_image(img_arr_2, verbose=False)
        centered_image_2 = add_padding(cropped_image_2, verbose=False)
        
        cropped_image_3, cropped_image_size_3 = tight_crop_image(img_arr_3, verbose=False)
        centered_image_3 = add_padding(cropped_image_3, verbose=False)
            
        return (centered_image_1, centered_image_2, centered_image_3), info #

    def __len__(self):
        return len(self.triplets)

In [0]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import shutil
import os
import io

class Tripletnet(nn.Module):
    def __init__(self, embeddingnet):
        super(Tripletnet, self).__init__()
        self.embeddingnet = embeddingnet

    def forward(self, x, y, z):
        embedded_x = self.embeddingnet(x)
        embedded_y = self.embeddingnet(y)
        embedded_z = self.embeddingnet(z)
        dist_a = F.pairwise_distance(embedded_x, embedded_y, 2)
        dist_b = F.pairwise_distance(embedded_x, embedded_z, 2)
        return dist_a, dist_b, embedded_x, embedded_y, embedded_z

In [0]:
# FontStyler의 convAE 코드 (layers.py)
class Encoder_conv(nn.Module):
    
    def __init__(self, img_dim=1, conv_dim=16): # output dim은 128이 될 것
        super(Encoder_conv, self).__init__()
        self.conv1 = conv2d(img_dim, conv_dim, k_size=5, stride=2, pad=2, dilation=2, lrelu=False, bn=False)
        self.conv2 = conv2d(conv_dim, conv_dim*2, k_size=5, stride=4, pad=2, dilation=2)
        self.conv3 = conv2d(conv_dim*2, conv_dim*4, k_size=4, stride=4, pad=1, dilation=1)
        self.conv4 = conv2d(conv_dim*4, conv_dim*8)
        self.conv5 = conv2d(conv_dim*8, conv_dim*8)
    
    def forward(self, images):
        # |images| = (batch, img, img)
        # print(images.shape)
        images = images.unsqueeze(dim=1)
        # |images| = (batch, 1, 128, 128)
        # print(images.shape)
        e1 = self.conv1(images)
        # |e1| = (batch, conv_dim, 64, 64)
        # print(e1.shape)
        e2 = self.conv2(e1)
        # |e2| = (batch, conv_dim*2, 16, 16)
        # print(e2.shape)
        e3 = self.conv3(e2)
        # |e3| = (batch, conv_dim*4, 4, 4)
        # print(e3.shape)
        e4 = self.conv4(e3)
        # |e4| = (batch, conv_dim*8, 2, 2)
        # print(e4.shape)
        encoded_source = self.conv5(e4)
        # |encoded_source| = (batch, conv_dim*8, 1, 1)
        # print(encoded_source.shape)
        
        return encoded_source

In [0]:
# function.py
import torch.nn as nn

def batch_norm(c_out, momentum=0.1):
    return nn.BatchNorm2d(c_out, momentum=momentum)

def conv2d(c_in, c_out, k_size=3, stride=2, pad=1, dilation=1, bn=True, lrelu=True, leak=0.2):
    layers = []
    if lrelu:
        layers.append(nn.LeakyReLU(leak))
    layers.append(nn.Conv2d(c_in, c_out, k_size, stride, pad))
    if bn:
        layers.append(nn.BatchNorm2d(c_out))
    return nn.Sequential(*layers)

def deconv2d(c_in, c_out, k_size=3, stride=1, pad=1, dilation=1, bn=True, dropout=False, p=0.5):
    layers = []
    layers.append(nn.LeakyReLU(0.2))
    layers.append(nn.ConvTranspose2d(c_in, c_out, k_size, stride, pad))
    if bn:
        layers.append(nn.BatchNorm2d(c_out))
    if dropout:
        layers.append(nn.Dropout(p))
    return nn.Sequential(*layers)

def lrelu(leak=0.2):
    return nn.LeakyReLU(leak)

def dropout(p=0.2):
    return nn.Dropout(p)

In [9]:
checkpoint_pth = '/content/drive/My Drive/Colab Notebooks/FontStyler/src/data/runs/TripleNet/model_best.pth.tar'
device = 'cuda' if torch.cuda.is_available() else 'cpu'
best = torch.load(checkpoint_pth)

# 학습된 TripletNet 로드
model = Encoder_conv()
tnet  = Tripletnet(model)
tnet.load_state_dict(best['state_dict'])

<All keys matched successfully>

In [10]:
tnet.cuda()

Tripletnet(
  (embeddingnet): Encoder_conv(
    (conv1): Sequential(
      (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(2, 2), padding=(2, 2))
    )
    (conv2): Sequential(
      (0): LeakyReLU(negative_slope=0.2)
      (1): Conv2d(16, 32, kernel_size=(5, 5), stride=(4, 4), padding=(2, 2))
      (2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv3): Sequential(
      (0): LeakyReLU(negative_slope=0.2)
      (1): Conv2d(32, 64, kernel_size=(4, 4), stride=(4, 4), padding=(1, 1))
      (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv4): Sequential(
      (0): LeakyReLU(negative_slope=0.2)
      (1): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
      (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (conv5): Sequential(
      (0): LeakyReLU(negative_slope=0.2)
      (1): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2)

## K-means clustering


In [0]:
dset = KoreanFontDataset(PickledImageProvider('/content/drive/My Drive/Colab Notebooks/FontStyler/src/data/dataset/kor/latent.obj'), vector_size=10)

In [48]:
dset[0]

({'font_doc2vec': [2.2403063999999997,
   -1.4756056999999998,
   -0.593018,
   -0.18605323,
   -1.2381212,
   -1.161201,
   2.8255024,
   0.10454782,
   -0.16260550000000001,
   1.2440913999999998],
  'font_index': 0,
  'word_index': 0},
 array([[1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        ...,
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.],
        [1., 1., 1., ..., 1., 1., 1.]]))

In [0]:
pickled = PickledImageProvider('/content/drive/My Drive/Colab Notebooks/FontStyler/src/data/dataset/kor/latent.obj')
triplet_loader = TripletImageLoader(pickled, '/content/drive/My Drive/Colab Notebooks/FontStyler/src/data/triplet_list_with_label.txt')

In [0]:
import pandas as pd
from sklearn.cluster import KMeans
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data.sampler import SubsetRandomSampler

idxs = list(range(len(triplet_loader)))
np.random.shuffle(idxs)
batch_size = 32

sampler = SubsetRandomSampler(idxs) # 전체 샘플링
loader = torch.utils.data.DataLoader(
        dset,
        batch_size = batch_size,
        sampler = None
    )

In [0]:
# 순차적으로 데이터를 호출하도록 수정
tnet.eval()
with torch.no_grad():
    anchors = []
    fonts  = []
    letters = []
    for idx, (info, img) in enumerate(loader): 
        if device == 'cuda':
            img = img.cuda()
        img = img.float()
        dummy_0, dummy_1 = img, img

        _, _, anchor, _, _ = tnet(img, dummy_0, dummy_1) # img2, img3에는 더미데이터
        anchors.append(anchor) 
        fonts.append(info['font_index'])
        letters.append(info['word_index'])
        # if idx == 1000:
        #     break

In [0]:
# # 순차적으로 데이터를 호출하도록 수정
# tnet.eval()
# with torch.no_grad():
#     anchors = []
#     labels  = []
#     indexes = []
#     for idx, ((img1, img2, img3), info) in enumerate(loader): 
#         if device == 'cuda':
#             img1, img2, img3 = img1.cuda(), img2.cuda(), img3.cuda()
#         img1, img2, img3 = img1.float(), img2.float(), img3.float()

#         _, _, anchor, _, _ = tnet(img1, img2, img3) 
#         anchors.append(anchor) 
#         labels.append(info['anchor_label'])
#         indexes.append(info['anchor_index'])
#         # if idx == 1000:
#         #     break

In [62]:
len(anchors), len(fonts), len(letters)

(7858, 7858, 7858)

In [63]:
anchors[0].shape

torch.Size([32, 128, 1, 1])

In [64]:
latent = np.zeros((1, 128, 1, 1))
for i in range(len(anchors)):
    latent = np.concatenate([latent, anchors[i].cpu()])
latent = latent[1:]
print('latent shape: {}'.format(latent.shape))

tmp_font = np.zeros((1))
for i in range(len(fonts)):
    tmp_font = np.concatenate([tmp_font, fonts[i].cpu()])
tmp_font = tmp_font[1:]
print('fonts shape: {}'.format(tmp_font.shape))

tmp_letter = np.zeros((1))
for i in range(len(letters)):
    tmp_letter = np.concatenate([tmp_letter, letters[i].cpu()])
tmp_letter = tmp_letter[1:]
print('letter shape: {}'.format(tmp_letter.shape))

latent shape: (251450, 128, 1, 1)
fonts shape: (251450,)
letter shape: (251450,)


In [65]:
latent = latent.reshape(251450,128)
latent.shape

(251450, 128)

In [0]:
# clustering
model = KMeans(n_clusters=5)
model.fit(latent)

y_predict = model.fit_predict(latent)

In [67]:
print(y_predict[:100])

[3 3 3 3 3 3 3 3 3 3 3 3 3 3 4 3 3 3 3 3 3 3 3 3 3 3 3 4 4 3 3 3 4 3 3 3 3
 3 4 4 3 3 3 3 3 3 3 3 3 3 4 4 3 3 4 4 3 3 4 3 3 4 3 4 4 4 4 3 4 3 4 0 4 4
 4 4 4 4 4 4 3 3 4 3 3 4 3 3 4 4 3 4 3 3 3 3 3 3 3 3]


---
## "폰트 idx" : "클러스터 평균 벡터" 매핑

### 평균 벡터 구하기

In [0]:
cluster_vector = dict()

In [69]:
# cluster 0
count = 0
cluster = 0
vector_size = 128
vectors = np.zeros(vector_size,)

for i, (idx, c) in enumerate(zip(tmp_index, y_predict)): 
    if c == cluster:
        vectors += latent[int(idx)]
        count += 1
avg_vector_0 = vectors / count
print('average vector for cluster {} with {} data'.format(cluster, count))

average vector for cluster 0 with 51661 data


In [70]:
# cluster 1
count = 0
cluster = 1
vector_size = 128
vectors = np.zeros(vector_size,)

for i, (idx, c) in enumerate(zip(tmp_index, y_predict)): 
    if c == cluster:
        vectors += latent[int(idx)]
        count += 1
avg_vector_1 = vectors / count
print('average vector for cluster {} with {} data'.format(cluster, count))

average vector for cluster 1 with 74310 data


In [71]:
# cluster 2
count = 0
cluster = 2
vector_size = 128
vectors = np.zeros(vector_size,)

for i, (idx, c) in enumerate(zip(tmp_index, y_predict)): 
    if c == cluster:
        vectors += latent[int(idx)]
        count += 1
avg_vector_2 = vectors / count
print('average vector for cluster {} with {} data'.format(cluster, count))

average vector for cluster 2 with 27733 data


In [72]:
# cluster 3
count = 0
cluster = 3
vector_size = 128
vectors = np.zeros(vector_size,)

for i, (idx, c) in enumerate(zip(tmp_index, y_predict)): 
    if c == cluster:
        vectors += latent[int(idx)]
        count += 1
avg_vector_3 = vectors / count
print('average vector for cluster {} with {} data'.format(cluster, count))

average vector for cluster 3 with 39621 data


In [73]:
# cluster 4
count = 0
cluster = 4
vector_size = 128
vectors = np.zeros(vector_size,)

for i, (idx, c) in enumerate(zip(tmp_index, y_predict)): 
    if c == cluster:
        vectors += latent[int(idx)]
        count += 1
avg_vector_4 = vectors / count
print('average vector for cluster {} with {} data'.format(cluster, count))

average vector for cluster 4 with 58125 data


In [0]:
cluster_vector = {
    0: avg_vector_0,
    1: avg_vector_1,
    2: avg_vector_2,
    3: avg_vector_3,
    4: avg_vector_4,
}

### 폰트 인덱스에 할당

In [86]:
# 가장 높은 빈도 가진 클러스터 출력 (test)
import collections
cc = collections.Counter(y_predict[:2350])
print(cc.most_common(1)[0][0]) # 3번 클러스터

3


In [0]:
font_idx_with_cluster_vec = dict()

In [90]:
count_0 = 0
count_1 = 0
count_2 = 0
count_3 = 0
count_4 = 0

vector_size = 128
'''
    같은 폰트라도 글자마다 다른 cluster로 분류될 수 있다.
    2350자 중 가장 많은 클러스터로 분류된 쪽으로 평균 latent vector를 할당한다.
'''

total = len(dset)
font_idx = 0

for i in range(0, total, 2350):
    counter = collections.Counter(y_predict[i : i+2350])
    clust = counter.most_common(1)[0][0]
    font_idx_with_cluster_vec[font_idx] = cluster_vector[clust]

    print("font {}'s cluster: {}".format(font_idx, clust))
    font_idx += 1

font 0's cluster: 3
font 1's cluster: 1
font 2's cluster: 3
font 3's cluster: 4
font 4's cluster: 4
font 5's cluster: 4
font 6's cluster: 0
font 7's cluster: 2
font 8's cluster: 1
font 9's cluster: 0
font 10's cluster: 2
font 11's cluster: 1
font 12's cluster: 1
font 13's cluster: 4
font 14's cluster: 4
font 15's cluster: 0
font 16's cluster: 0
font 17's cluster: 1
font 18's cluster: 2
font 19's cluster: 1
font 20's cluster: 0
font 21's cluster: 0
font 22's cluster: 3
font 23's cluster: 0
font 24's cluster: 1
font 25's cluster: 4
font 26's cluster: 1
font 27's cluster: 3
font 28's cluster: 4
font 29's cluster: 0
font 30's cluster: 1
font 31's cluster: 0
font 32's cluster: 1
font 33's cluster: 4
font 34's cluster: 1
font 35's cluster: 0
font 36's cluster: 0
font 37's cluster: 3
font 38's cluster: 0
font 39's cluster: 1
font 40's cluster: 3
font 41's cluster: 4
font 42's cluster: 1
font 43's cluster: 3
font 44's cluster: 4
font 45's cluster: 0
font 46's cluster: 1
font 47's cluster: 0
fo

In [91]:
len(font_idx_with_cluster_vec)

107

## Pickle로 저장

In [0]:
import pickle

with open('/content/drive/My Drive/Colab Notebooks/FontStyler/src/data/font_idx_with_cluster_vec.p', 'wb') as file:
    pickle.dump(font_idx_with_cluster_vec, file)

In [95]:
# 확인
with open('/content/drive/My Drive/Colab Notebooks/FontStyler/src/data/font_idx_with_cluster_vec.p', 'rb') as file:
    vec_test = pickle.load(file)
    print(len(vec_test))

107
