# CNN project with animal draws : data preparation and processing 

### Set up notebook

Set up and activate gpu

In [1]:
import tensorflow as tf
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)

Import required libraries

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score 
import itertools

#import all the pieces of the neural network
from tensorflow.keras.models import Sequential, Model, load_model#the model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten, Input #fully connected layers
from tensorflow.keras.layers import Convolution2D, MaxPooling2D #the convnet
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras import layers, models


### Import datasets

In [4]:
bear_dataset = pd.read_json('assets/full_simplified_bear.ndjson', lines=True)
bee_dataset = pd.read_json('assets/full_simplified_bee.ndjson', lines=True)
bird_dataset = pd.read_json('assets/full_simplified_bird.ndjson', lines=True)
butterfly_dataset = pd.read_json('assets/full_simplified_butterfly.ndjson', lines=True)
camel_dataset = pd.read_json('assets/full_simplified_camel.ndjson', lines=True)
cat_dataset = pd.read_json('assets/full_simplified_cat.ndjson', lines=True)

In [5]:
cow_dataset = pd.read_json('assets/full_simplified_cow.ndjson', lines=True)
crab_dataset = pd.read_json('assets/full_simplified_crab.ndjson', lines=True)
crocodile_dataset = pd.read_json('assets/full_simplified_crocodile.ndjson', lines=True)
dog_dataset = pd.read_json('assets/full_simplified_dog.ndjson', lines=True)
dolphin_dataset = pd.read_json('assets/full_simplified_dolphin.ndjson', lines=True)
elephant_dataset = pd.read_json('assets/full_simplified_elephant.ndjson', lines=True)

In [4]:
frog_dataset = pd.read_json('assets/full_simplified_frog.ndjson', lines=True)
giraffe_dataset = pd.read_json('assets/full_simplified_giraffe.ndjson', lines=True)
horse_dataset = pd.read_json('assets/full_simplified_horse.ndjson', lines=True)
kangaroo_dataset = pd.read_json('assets/full_simplified_kangaroo.ndjson', lines=True)
lion_dataset = pd.read_json('assets/full_simplified_lion.ndjson', lines=True)
lobster_dataset = pd.read_json('assets/full_simplified_lobster.ndjson', lines=True)

In [5]:
monkey_dataset = pd.read_json('assets/full_simplified_monkey.ndjson', lines=True)
mosquito_dataset = pd.read_json('assets/full_simplified_mosquito.ndjson', lines=True)
mouse_dataset = pd.read_json('assets/full_simplified_mouse.ndjson', lines=True)
octopus_dataset = pd.read_json('assets/full_simplified_octopus.ndjson', lines=True)
owl_dataset = pd.read_json('assets/full_simplified_owl.ndjson', lines=True)
panda_dataset = pd.read_json('assets/full_simplified_panda.ndjson', lines=True)

In [6]:
parrot_dataset = pd.read_json('assets/full_simplified_parrot.ndjson', lines=True)
penguin_dataset = pd.read_json('assets/full_simplified_penguin.ndjson', lines=True)
pig_dataset = pd.read_json('assets/full_simplified_pig.ndjson', lines=True)
rabbit_dataset = pd.read_json('assets/full_simplified_rabbit.ndjson', lines=True)
raccoon_dataset = pd.read_json('assets/full_simplified_raccoon.ndjson', lines=True)
rhinoceros_dataset = pd.read_json('assets/full_simplified_rhinoceros.ndjson', lines=True)

In [7]:
scorpion_dataset = pd.read_json('assets/full_simplified_scorpion.ndjson', lines=True)
sea_turtle_dataset = pd.read_json('assets/full_simplified_sea turtle.ndjson', lines=True)
sheep_dataset = pd.read_json('assets/full_simplified_sheep.ndjson', lines=True)
snail_dataset = pd.read_json('assets/full_simplified_snail.ndjson', lines=True)
snake_dataset = pd.read_json('assets/full_simplified_snake.ndjson', lines=True)
spider_dataset = pd.read_json('assets/full_simplified_spider.ndjson', lines=True)

In [8]:
squirrel_dataset = pd.read_json('assets/full_simplified_squirrel.ndjson', lines=True)
swan_dataset = pd.read_json('assets/full_simplified_swan.ndjson', lines=True)
tiger_dataset = pd.read_json('assets/full_simplified_tiger.ndjson', lines=True)
whale_dataset = pd.read_json('assets/full_simplified_whale.ndjson', lines=True)

### Preprocessing the data

Functions for preprocessing images

In [9]:
def doodle_to_raster(doodle: list):
    input_size = 256
    output_size = 64
    raster = np.zeros((output_size, output_size), dtype='uint8')
    for stroke in doodle:
        raster += stroke_to_raster(stroke, input_size, output_size)
    raster = raster.clip(0, 1)
    return raster
            
            
def stroke_to_raster(stroke: list, input_size: int, output_size: int):
    raster = np.zeros((output_size, output_size), dtype='uint8')
    for i in range(0, len(stroke[0])-1):
        start_point = np.array([stroke[0][i], stroke[1][i]])
        start_point = rescale_point(start_point, input_size, output_size)
        end_point = np.array([stroke[0][i+1], stroke[1][i+1]])
        end_point = rescale_point(end_point, input_size, output_size)
        discretised_line = discretise_line(start_point, end_point)
        raster += line_to_raster(discretised_line, output_size)
    return raster
            
            
def line_to_raster(line: list, output_size: int):
    raster = np.zeros((output_size, output_size), dtype='uint8')
    for point_index in range(0, line.shape[0]):
        point = line[point_index, :]
        raster[int(min(output_size-1,point[1])), int(min(output_size-1,point[0]))] = 1
    return raster


def discretise_line(start_point: list, end_point: list):
    points = np.empty((0,2), dtype='uint8')
    discrete_length = calculate_discrete_length(start_point, end_point)
    for step in range(0, discrete_length + 1):
        segment_fraction = 0.0 if discrete_length == 0 else step/float(discrete_length)
        points = np.vstack((points, round_point(interpolate_point(start_point, end_point, segment_fraction))))
    return points
        
    
def calculate_discrete_length(start_point: list, end_point: list):
    start_point = round_point(start_point)
    end_point = round_point(end_point)
    delta_x = end_point[0] - start_point[0]
    delta_y = end_point[1] - start_point[1]
    return max(abs(delta_x), abs(delta_y))


def round_point(point: list):
    return np.array([round(point[0]), round(point[1])])


def rescale_point(point: list, input_size: int, output_size: int):
    factor = float(input_size)/float(output_size)
    return np.array([round(float(point[0])/factor), round(float(point[1])/factor)])


def interpolate_point(start_point: list, end_point: list, segment_fraction: float):
    return np.array([interpolate(start_point[0], end_point[0], segment_fraction), 
                   interpolate(start_point[1], end_point[1], segment_fraction)])

def interpolate(start: float, end: float, segment_fraction: float):
    return start + segment_fraction*(end - start)

#### 1 to 10

file name : 
bear.npy
bee.npy
bird.npy
butterfly.npy
camel.npy
cat.npy
cow.npy
crab.npy
crocodile.npy
dog.npy

bear

In [162]:
bear_dataset.shape

(134762, 6)

In [163]:
preprocessed_bear = bear_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
bear_labels = np.zeros((20000, 1))

In [164]:
preprocessed_bear_dataset = pd.DataFrame(preprocessed_bear)
preprocessed_bear_dataset.insert(1, "label", bear_labels)

In [165]:
np_bear = preprocessed_bear_dataset.to_numpy()
np.save('bear.npy', np_bear)

bee

In [166]:
bee_dataset.shape

(120890, 6)

In [167]:
preprocessed_bee = bee_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
bee_labels = np.ones((20000, 1))

In [168]:
preprocessed_bee_dataset = pd.DataFrame(preprocessed_bee)
preprocessed_bee_dataset.insert(1, "label", bee_labels)

In [169]:
np_bee = preprocessed_bee_dataset.to_numpy()
np.save('bee.npy', np_bee)

bird

In [170]:
bird_dataset.shape

(133572, 6)

In [171]:
preprocessed_bird = bird_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
bird_labels = np.full((20000, 1),2)

In [172]:
preprocessed_bird_dataset = pd.DataFrame(preprocessed_bird)
preprocessed_bird_dataset.insert(1, "label", bird_labels)

In [173]:
np_bird = preprocessed_bird_dataset.to_numpy()
np.save('bird.npy', np_bird)

butterfly

In [174]:
butterfly_dataset.shape

(117999, 6)

In [175]:
preprocessed_butterfly = butterfly_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
butterfly_labels = np.full((20000, 1),3)

In [176]:
preprocessed_butterfly_dataset = pd.DataFrame(preprocessed_butterfly)
preprocessed_butterfly_dataset.insert(1, "label", butterfly_labels)

In [177]:
np_butterfly = preprocessed_butterfly_dataset.to_numpy()
np.save('butterfly.npy', np_butterfly)

camel

In [178]:
camel_dataset.shape 

(121399, 6)

In [179]:
preprocessed_camel = camel_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
camel_labels = np.full((20000, 1),4)

In [180]:
preprocessed_camel_dataset = pd.DataFrame(preprocessed_camel)
preprocessed_camel_dataset.insert(1, "label", camel_labels)

In [181]:
np_camel = preprocessed_camel_dataset.to_numpy()
np.save('camel.npy', np_camel)

cat

In [182]:
cat_dataset.shape

(123202, 6)

In [183]:
preprocessed_cat = cat_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
cat_labels = np.full((20000, 1),5)


In [184]:
preprocessed_cat_dataset = pd.DataFrame(preprocessed_cat)
preprocessed_cat_dataset.insert(1, "label", cat_labels)

In [185]:
np_cat = preprocessed_cat_dataset.to_numpy()
np.save('cat.npy', np_cat)

cow

In [186]:
cow_dataset.shape

(123083, 6)

In [187]:
preprocessed_cow = cow_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
cow_labels = np.full((20000, 1),6)

In [188]:
preprocessed_cow_dataset = pd.DataFrame(preprocessed_cow)
preprocessed_cow_dataset.insert(1, "label", cow_labels)

In [189]:
np_cow = preprocessed_cow_dataset.to_numpy()
np.save('cow.npy', np_cow)

crab

In [190]:
crab_dataset.shape

(126930, 6)

In [191]:
preprocessed_crab = crab_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
crab_labels = np.full((20000, 1),7)

In [192]:
preprocessed_crab_dataset = pd.DataFrame(preprocessed_crab)
preprocessed_crab_dataset.insert(1, "label", crab_labels)

In [193]:
np_crab = preprocessed_crab_dataset.to_numpy()
np.save('crab.npy', np_crab)

crododile

In [194]:
crocodile_dataset.shape

(127932, 6)

In [195]:
preprocessed_crocodile = crocodile_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
crocodile_labels = np.full((20000, 1),8)


In [196]:
preprocessed_crocodile_dataset = pd.DataFrame(preprocessed_crocodile)
preprocessed_crocodile_dataset.insert(1, "label", crocodile_labels)

In [197]:
np_crocodile = preprocessed_crocodile_dataset.to_numpy()
np.save('crocodile.npy', np_crocodile)

dog

In [198]:
dog_dataset.shape

(152159, 6)

In [199]:
preprocessed_dog = dog_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
dog_labels = np.full((20000, 1),9)


In [200]:
preprocessed_dog_dataset = pd.DataFrame(preprocessed_dog)
preprocessed_dog_dataset.insert(1, "label", dog_labels)

In [201]:
np_dog = preprocessed_dog_dataset.to_numpy()
np.save('dog.npy', np_dog)

#### 11 to 20

file names : 
dolphin.npy
elephant.npy
frog.npy
giraffe.npy
horse.npy
kangaroo.npy
lion.npy
lobster.npy
monkey.npy
mosquito.npy

dolphin

In [202]:
dolphin_dataset.shape

(121613, 6)

In [203]:
preprocessed_dolphin = dolphin_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
dolphin_labels = np.full((20000, 1),10)


In [204]:
preprocessed_dolphin_dataset = pd.DataFrame(preprocessed_dolphin)
preprocessed_dolphin_dataset.insert(1, "label", dolphin_labels)

In [205]:
np_dolphin = preprocessed_dolphin_dataset.to_numpy()
np.save('dolphin.npy', np_dolphin)

elephant

In [206]:
elephant_dataset.shape

(126969, 6)

In [207]:
preprocessed_elephant = elephant_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
elephant_labels = np.full((20000, 1),11)


In [208]:
preprocessed_elephant_dataset = pd.DataFrame(preprocessed_elephant)
preprocessed_elephant_dataset.insert(1, "label", elephant_labels)

In [209]:
np_elephant = preprocessed_elephant_dataset.to_numpy()
np.save('elephant.npy', np_elephant)

frog

In [210]:
frog_dataset.shape

(159047, 6)

In [211]:
preprocessed_frog = frog_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
frog_labels = np.full((20000, 1),12)


In [212]:
preprocessed_frog_dataset = pd.DataFrame(preprocessed_frog)
preprocessed_frog_dataset.insert(1, "label", frog_labels)

In [213]:
np_frog = preprocessed_frog_dataset.to_numpy()
np.save('frog.npy', np_frog)

giraffe

In [214]:
giraffe_dataset.shape

(127182, 6)

In [215]:
preprocessed_giraffe = giraffe_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
giraffe_labels = np.full((20000, 1),13)


In [216]:
preprocessed_giraffe_dataset = pd.DataFrame(preprocessed_giraffe)
preprocessed_giraffe_dataset.insert(1, "label", giraffe_labels)

In [217]:
np_giraffe = preprocessed_giraffe_dataset.to_numpy()
np.save('giraffe.npy', np_giraffe)

horse

In [218]:
horse_dataset.shape

(178286, 6)

In [219]:
preprocessed_horse = horse_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
horse_labels = np.full((20000, 1),14)


In [220]:
preprocessed_horse_dataset = pd.DataFrame(preprocessed_horse)
preprocessed_horse_dataset.insert(1, "label", horse_labels)

In [221]:
np_horse = preprocessed_horse_dataset.to_numpy()
np.save('horse.npy', np_horse)

kangaroo

In [222]:
kangaroo_dataset.shape

(174470, 6)

In [223]:
preprocessed_kangaroo = kangaroo_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
kangaroo_labels = np.full((20000, 1),15)


In [224]:
preprocessed_kangaroo_dataset = pd.DataFrame(preprocessed_kangaroo)
preprocessed_kangaroo_dataset.insert(1, "label", kangaroo_labels)

In [225]:
np_kangaroo = preprocessed_kangaroo_dataset.to_numpy()
np.save('kangaroo.npy', np_kangaroo)

lion

In [226]:
lion_dataset.shape

(120949, 6)

In [227]:
preprocessed_lion = lion_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
lion_labels = np.full((20000, 1),16)


In [228]:
preprocessed_lion_dataset = pd.DataFrame(preprocessed_lion)
preprocessed_lion_dataset.insert(1, "label", lion_labels)

In [229]:
np_lion = preprocessed_lion_dataset.to_numpy()
np.save('lion.npy', np_lion)

lobster

In [230]:
lobster_dataset.shape

(140175, 6)

In [231]:
preprocessed_lobster = lobster_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
lobster_labels = np.full((20000, 1),17)


In [233]:
preprocessed_lobster_dataset = pd.DataFrame(preprocessed_lobster)
preprocessed_lobster_dataset.insert(1, "label", lobster_labels)

In [234]:
np_lobster = preprocessed_lobster_dataset.to_numpy()
np.save('lobster.npy', np_lobster)

monkey

In [235]:
monkey_dataset.shape

(127633, 6)

In [236]:
preprocessed_monkey = monkey_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
monkey_labels = np.full((20000, 1),18)


In [237]:
preprocessed_monkey_dataset = pd.DataFrame(preprocessed_monkey)
preprocessed_monkey_dataset.insert(1, "label", monkey_labels)

In [238]:
np_monkey = preprocessed_monkey_dataset.to_numpy()
np.save('monkey.npy', np_monkey)

mosquito

In [239]:
mosquito_dataset.shape

(123029, 6)

In [240]:
preprocessed_mosquito = mosquito_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
mosquito_labels = np.full((20000, 1),19)


In [241]:
preprocessed_mosquito_dataset = pd.DataFrame(preprocessed_mosquito)
preprocessed_mosquito_dataset.insert(1, "label", mosquito_labels)

In [242]:
np_mosquito = preprocessed_mosquito_dataset.to_numpy()
np.save('mosquito.npy', np_mosquito)

#### 21 to 30

list of animals : 
mouse.npy
octopus.npy
owl.npy
panda.npy
parrot.npy
penguin.npy
pig.npy
rabbit.npy
raccoon.npy
rhinoceros.npy

mouse

In [10]:
mouse_dataset.shape

(178826, 6)

In [11]:
preprocessed_mouse = mouse_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
mouse_labels = np.full((20000, 1),20)


In [12]:
preprocessed_mouse_dataset = pd.DataFrame(preprocessed_mouse)
preprocessed_mouse_dataset.insert(1, "label", mouse_labels)

In [13]:
np_mouse = preprocessed_mouse_dataset.to_numpy()
np.save('mouse.npy', np_mouse)

octopus

In [14]:
octopus_dataset.shape

(150152, 6)

In [15]:
preprocessed_octopus = octopus_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
octopus_labels = np.full((20000, 1),21)


In [16]:
preprocessed_octopus_dataset = pd.DataFrame(preprocessed_octopus)
preprocessed_octopus_dataset.insert(1, "label", octopus_labels)

In [17]:
np_octopus = preprocessed_octopus_dataset.to_numpy()
np.save('octopus.npy', np_octopus)

owl

In [18]:
owl_dataset.shape

(169632, 6)

In [19]:
preprocessed_owl = owl_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
owl_labels = np.full((20000, 1),22)


In [20]:
preprocessed_owl_dataset = pd.DataFrame(preprocessed_owl)
preprocessed_owl_dataset.insert(1, "label", owl_labels)

In [21]:
np_owl = preprocessed_owl_dataset.to_numpy()
np.save('owl.npy', np_owl)

panda

In [22]:
panda_dataset.shape

(113613, 6)

In [23]:
preprocessed_panda = panda_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
panda_labels = np.full((20000, 1),23)


In [24]:
preprocessed_panda_dataset = pd.DataFrame(preprocessed_panda)
preprocessed_panda_dataset.insert(1, "label", panda_labels)

In [25]:
np_panda = preprocessed_panda_dataset.to_numpy()
np.save('panda.npy', np_panda)

parrot

In [26]:
parrot_dataset.shape

(185530, 6)

In [27]:
preprocessed_parrot = parrot_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
parrot_labels = np.full((20000, 1),24)


In [28]:
preprocessed_parrot_dataset = pd.DataFrame(preprocessed_parrot)
preprocessed_parrot_dataset.insert(1, "label", parrot_labels)

In [29]:
np_parrot = preprocessed_parrot_dataset.to_numpy()
np.save('parrot.npy', np_parrot)

penguin

In [30]:
penguin_dataset.shape

(253791, 6)

In [31]:
preprocessed_penguin = penguin_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
penguin_labels = np.full((20000, 1),25)


In [32]:
preprocessed_penguin_dataset = pd.DataFrame(preprocessed_penguin)
preprocessed_penguin_dataset.insert(1, "label", penguin_labels)

In [33]:
np_penguin = preprocessed_penguin_dataset.to_numpy()
np.save('penguin.npy', np_penguin)

pig

In [34]:
pig_dataset.shape

(186770, 6)

In [35]:
preprocessed_pig = pig_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
pig_labels = np.full((20000, 1),26)


In [36]:
preprocessed_pig_dataset = pd.DataFrame(preprocessed_pig)
preprocessed_pig_dataset.insert(1, "label", pig_labels)

In [37]:
np_pig = preprocessed_pig_dataset.to_numpy()
np.save('pig.npy', np_pig)

rabbit

In [38]:
rabbit_dataset.shape

(155288, 6)

In [39]:
preprocessed_rabbit = rabbit_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
rabbit_labels = np.full((20000, 1),27)


In [40]:
preprocessed_rabbit_dataset = pd.DataFrame(preprocessed_rabbit)
preprocessed_rabbit_dataset.insert(1, "label", rabbit_labels)

In [41]:
np_rabbit = preprocessed_rabbit_dataset.to_numpy()
np.save('rabbit.npy', np_rabbit)

racoon

In [42]:
raccoon_dataset.shape

(119588, 6)

In [43]:
preprocessed_raccoon = raccoon_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
raccoon_labels = np.full((20000, 1),28)


In [44]:
preprocessed_raccoon_dataset = pd.DataFrame(preprocessed_raccoon)
preprocessed_raccoon_dataset.insert(1, "label", raccoon_labels)

In [45]:
np_raccoon = preprocessed_raccoon_dataset.to_numpy()
np.save('raccoon.npy', np_raccoon)

rhinoceros

In [46]:
rhinoceros_dataset.shape

(188484, 6)

In [47]:
preprocessed_rhinoceros = rhinoceros_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
rhinoceros_labels = np.full((20000, 1),29)


In [48]:
preprocessed_rhinoceros_dataset = pd.DataFrame(preprocessed_rhinoceros)
preprocessed_rhinoceros_dataset.insert(1, "label", rhinoceros_labels)

In [49]:
np_rhinoceros = preprocessed_rhinoceros_dataset.to_numpy()
np.save('rhinoceros.npy', np_rhinoceros)

#### 31 to 40

list of animals : 
scorpion.npy
sea_turtle.npy
sheep.npy
snail.npy
snake.npy
spider.npy
squirrel.npy
swan.npy
tiger.npy
whale.npy

scorpion

In [50]:
scorpion_dataset.shape

(165689, 6)

In [51]:
preprocessed_scorpion = scorpion_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
scorpion_labels = np.full((20000, 1),30)


In [52]:
preprocessed_scorpion_dataset = pd.DataFrame(preprocessed_scorpion)
preprocessed_scorpion_dataset.insert(1, "label", scorpion_labels)

In [53]:
np_scorpion = preprocessed_scorpion_dataset.to_numpy()
np.save('scorpion.npy', np_scorpion)

sea turtle

In [54]:
sea_turtle_dataset.shape

(119876, 6)

In [55]:
preprocessed_sea_turtle = sea_turtle_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
sea_turtle_labels = np.full((20000, 1),31)


In [56]:
preprocessed_sea_turtle_dataset = pd.DataFrame(preprocessed_sea_turtle)
preprocessed_sea_turtle_dataset.insert(1, "label", sea_turtle_labels)

In [57]:
np_sea_turtle = preprocessed_sea_turtle_dataset.to_numpy()
np.save('sea_turtle.npy', np_sea_turtle)

sheep

In [58]:
sheep_dataset.shape

(126121, 6)

In [59]:
preprocessed_sheep = sheep_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
sheep_labels = np.full((20000, 1),32)


In [60]:
preprocessed_sheep_dataset = pd.DataFrame(preprocessed_sheep)
preprocessed_sheep_dataset.insert(1, "label", sheep_labels)

In [61]:
np_sheep = preprocessed_sheep_dataset.to_numpy()
np.save('sheep.npy', np_sheep)

snail

In [62]:
snail_dataset.shape

(133757, 6)

In [63]:
preprocessed_snail = snail_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
snail_labels = np.full((20000, 1),33)


In [64]:
preprocessed_snail_dataset = pd.DataFrame(preprocessed_snail)
preprocessed_snail_dataset.insert(1, "label", snail_labels)

In [65]:
np_snail = preprocessed_snail_dataset.to_numpy()
np.save('snail.npy', np_snail)

snake

In [66]:
snake_dataset.shape

(122273, 6)

In [67]:
preprocessed_snake = snake_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
snake_labels = np.full((20000, 1),34)


In [68]:
preprocessed_snake_dataset = pd.DataFrame(preprocessed_snake)
preprocessed_snake_dataset.insert(1, "label", snake_labels)

In [69]:
np_snake = preprocessed_snake_dataset.to_numpy()
np.save('snake.npy', np_snake)

spider

In [70]:
spider_dataset.shape

(209447, 6)

In [71]:
preprocessed_spider = spider_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
spider_labels = np.full((20000, 1),35)


In [72]:
preprocessed_spider_dataset = pd.DataFrame(preprocessed_spider)
preprocessed_spider_dataset.insert(1, "label", spider_labels)

In [73]:
np_spider = preprocessed_spider_dataset.to_numpy()
np.save('spider.npy', np_spider)

squirrel

In [74]:
squirrel_dataset.shape

(156883, 6)

In [75]:
preprocessed_squirrel = squirrel_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
squirrel_labels = np.full((20000, 1),36)


In [76]:
preprocessed_squirrel_dataset = pd.DataFrame(preprocessed_squirrel)
preprocessed_squirrel_dataset.insert(1, "label", squirrel_labels)

In [77]:
np_squirrel = preprocessed_squirrel_dataset.to_numpy()
np.save('squirrel.npy', np_squirrel)

swan

In [78]:
swan_dataset.shape

(152088, 6)

In [79]:
preprocessed_swan = swan_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
swan_labels = np.full((20000, 1),37)


In [80]:
preprocessed_swan_dataset = pd.DataFrame(preprocessed_swan)
preprocessed_swan_dataset.insert(1, "label", swan_labels)

In [81]:
np_swan = preprocessed_swan_dataset.to_numpy()
np.save('swan.npy', np_swan)

tiger

In [82]:
tiger_dataset.shape

(121067, 6)

In [83]:
preprocessed_tiger = tiger_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
tiger_labels = np.full((20000, 1),38)


In [84]:
preprocessed_tiger_dataset = pd.DataFrame(preprocessed_tiger)
preprocessed_tiger_dataset.insert(1, "label", tiger_labels)

In [85]:
np_tiger = preprocessed_tiger_dataset.to_numpy()
np.save('tiger.npy', np_tiger)

whale

In [86]:
whale_dataset.shape

(116502, 6)

In [87]:
preprocessed_whale = whale_dataset['drawing'].iloc[0:20000].apply(doodle_to_raster)
whale_labels = np.full((20000, 1),39)


In [88]:
preprocessed_whale_dataset = pd.DataFrame(preprocessed_whale)
preprocessed_whale_dataset.insert(1, "label", whale_labels)

In [89]:
np_whale = preprocessed_whale_dataset.to_numpy()
np.save('whale.npy', np_whale)