# Style and Libraries

In [1]:
# Competition: https://www.kaggle.com/olgabelitskaya/traditional-decor-patterns
# !pip3 install tqdm
# !pip3 install keras
# !pip3 install tensorflow
# !pip3 install numpy --upgrade


In [2]:
%%html
<style> 
h1, h2, h3 {text-shadow: 3px 3px 3px #aaa;} 
span {color: black; text-shadow: 3px 3px 3px #aaa;}
div.output_prompt {color: crimson;} 
div.input_prompt {color: firebrick;} 
div.output_area pre, div.output_subarea {font-size: 15px; color: crimson}
div.output_stderr pre {background-color: #f7e8e8;}
</style>

In [3]:
import warnings
warnings.filterwarnings('ignore', category=FutureWarning)

import numpy as np 
import pandas as pd

from tqdm import tqdm
import h5py
import cv2

import matplotlib.pylab as plt
from matplotlib import cm
import seaborn as sns
%matplotlib inline

from sklearn.model_selection import train_test_split
from keras.utils import to_categorical
from skimage import color, measure

from IPython.core.magic import (register_line_magic, register_cell_magic)

Using TensorFlow backend.


# Load and Explore the Data

In [4]:
# Load and display the data
data = pd.read_csv("../input/decor.csv")
data.head()

FileNotFoundError: File b'../input/decor.csv' does not exist

In [None]:
# Plot decor distribution
plt.style.use('seaborn-whitegrid')
plt.figure(figsize=(15,5))
sns.countplot(x="decor", data=data,
              facecolor=(0, 0, 0, 0), linewidth=7,
              edgecolor=sns.color_palette("Set1",7))
plt.title('Decor Distribution', fontsize=20);

In [None]:
# Plot decor distribution grouped by country
plt.figure(figsize=(15,5))
sns.countplot(x="decor", hue="country", data=data, palette='Set1')
plt.legend(loc=1)
plt.title('Decor Distribution Grouped by Country', 
          fontsize=20);

In [None]:
# Print unique values of decor names
set(data['decor'])

In [None]:
# Print unique values of country names
set(data['country'])

In [None]:
# Read the h5 file
f = h5py.File('../input/DecorColorImages.h5', 'r')

# List all groups
keys = list(f.keys())
keys

In [None]:
# Create tensors and targets
countries = np.array(f[keys[0]])
decors = np.array(f[keys[1]])
images = np.array(f[keys[2]])
types = np.array(f[keys[3]])

print ('Country shape:', countries.shape)
print ('Decor shape', decors.shape)
print ('Image shape:', images.shape)
print ('Type shape', types.shape)

# Implement Preprocess Functions

In [None]:
# Normalize the tensors
images = images.astype('float32')/255

In [None]:
# Read and display a tensor using Matplotlib
pattern_number = 106
print('Country: ', countries[pattern_number], '-', data['country'][pattern_number])
print('Decor: ', decors[pattern_number], '-', data['decor'][pattern_number])
print('Type: ', types[pattern_number], '-', data['type'][pattern_number])
plt.figure(figsize=(5,5))
plt.imshow(images[pattern_number]);

In [None]:
# Grayscaled tensors
gray_images = np.dot(images[...,:3], [0.299, 0.587, 0.114])
print ("Shape of grayscaled images:", gray_images.shape)

In [None]:
# Read and display a grayscaled tensor using Matplotlib
print('Country: ', countries[pattern_number], '-', data['country'][pattern_number])
print('Decor: ', decors[pattern_number], '-', data['decor'][pattern_number])
print('Type: ', types[pattern_number], '-', data['type'][pattern_number])
plt.figure(figsize=(5,5))
plt.imshow(gray_images[pattern_number], cmap=cm.bone);

In [None]:
# Print the target unique values
print('Countries: ', set(countries))
print('Decors: ', set(decors))
print('Types: ', set(types))

In [None]:
# One-hot encode the targets, started from the zero label
cat_countries = to_categorical(np.array(countries-1), 4)
cat_decors = to_categorical(np.array(decors-1), 7)
cat_types = to_categorical(np.array(types-1), 2)
cat_countries.shape, cat_decors.shape, cat_types.shape

In [None]:
# Create multi-label targets
targets = np.concatenate((cat_countries, cat_decors), axis=1)
targets = np.concatenate((targets, cat_types), axis=1)
targets.shape

In [None]:
# Split the data / Color images / Country targets
x_train, x_test, y_train, y_test = train_test_split(images, cat_countries, 
                                                    test_size = 0.2, 
                                                    random_state = 1)
n = int(len(x_test)/2)
x_valid, y_valid = x_test[:n], y_test[:n]
x_test, y_test = x_test[n:], y_test[n:]

In [None]:
# Split the data / Color images / Decor targets
x_train3, x_test3, y_train3, y_test3 = train_test_split(images, cat_decors, 
                                                        test_size = 0.2, 
                                                        random_state = 1)
n = int(len(x_test3)/2)
x_valid3, y_valid3 = x_test3[:n], y_test3[:n]
x_test3, y_test3 = x_test3[n:], y_test3[n:]

In [None]:
# Split the data / Color images / Multi-Label targets
x_train5, x_test5, y_train5, y_test5 = train_test_split(images, targets, 
                                                        test_size = 0.2, 
                                                        random_state = 1)
n = int(len(x_test5)/2)
x_valid5, y_valid5 = x_test5[:n], y_test5[:n]
x_test5, y_test5 = x_test5[n:], y_test5[n:]

In [None]:
# Split the data / Grayscaled images / Country targets
x_train2, x_test2, y_train2, y_test2 = train_test_split(gray_images, cat_countries, 
                                                        test_size = 0.2, 
                                                        random_state = 1)
n = int(len(x_test2)/2)
x_valid2, y_valid2 = x_test2[:n], y_test2[:n]
x_test2, y_test2 = x_test2[n:], y_test2[n:]

In [None]:
# Reshape the grayscaled data
x_train2, x_test2, x_valid2 = \
x_train2.reshape(-1, 150, 150, 1), \
x_test2.reshape(-1, 150, 150, 1), \
x_valid2.reshape(-1, 150, 150, 1)

In [None]:
# Split the data / Grayscaled images / Decor targets
x_train4, x_test4, y_train4, y_test4 = train_test_split(gray_images, cat_decors, 
                                                        test_size = 0.2, 
                                                        random_state = 1)
n = int(len(x_test4)/2)
x_valid4, y_valid4 = x_test4[:n], y_test4[:n]
x_test4, y_test4 = x_test4[n:], y_test4[n:]

In [None]:
# Reshape the grayscaled data
x_train4, x_test4, x_valid4 = \
x_train4.reshape(-1, 150, 150, 1), \
x_test4.reshape(-1, 150, 150, 1), \
x_valid4.reshape(-1, 150, 150, 1)

In [None]:
# Split the data / Grayscaled images / Multi-Label targets
x_train6, x_test6, y_train6, y_test6 = train_test_split(gray_images, targets, 
                                                        test_size = 0.2, 
                                                        random_state = 1)
n = int(len(x_test6)/2)
x_valid6, y_valid6 = x_test6[:n], y_test6[:n]
x_test6, y_test6 = x_test6[n:], y_test6[n:]

In [None]:
# Reshape the grayscaled data
x_train6, x_test6, x_valid6 = \
x_train6.reshape(-1, 150, 150, 1), \
x_test6.reshape(-1, 150, 150, 1), \
x_valid6.reshape(-1, 150, 150, 1)

In [None]:
# Create a list of targets
y_train6_list = [y_train6[:, :4], y_train6[:, 4:11], y_train6[:, 11:]]
y_test6_list = [y_test6[:, :4], y_test6[:, 4:11], y_test6[:, 11:]]
y_valid6_list = [y_valid6[:, :4], y_valid6[:, 4:11], y_valid6[:, 11:]]

# Vectorize Images
#### Just for fun

In [None]:
# Create a magic function
@register_line_magic
def vector(number):
    example = images[int(number)]
    gray_example = color.colorconv.rgb2grey(example)
    contours = measure.find_contours(gray_example, 0.85)
    plt.figure(figsize=(8,8))
    plt.gca().invert_yaxis()
    for n, contour in enumerate(contours):
        plt.plot(contour[:, 1], contour[:, 0], lw=1)

In [None]:
# Display a vector image 
%vector 106

In [None]:
# Display a vector image 
%vector 200

In [None]:
# Updated by https://www.kaggle.com/olgabelitskaya/preprocessing-of-pattern-images