In [4]:
import cv2                 # working with, mainly resizing, images
import numpy as np         # dealing with arrays
import os                  # dealing with directories
from random import shuffle # mixing up or currently ordered data that might lead our network astray in training.
from tqdm import tqdm      # a nice pretty percentage bar for tasks. Thanks to viewer Daniel Bühler for this suggestion
import csv
import pandas as pd

In [5]:
TRAIN_DIR = '../data/train_img'
TEST_DIR = '../data/test_img'
IMG_SIZE = 50
LR = 1e-3

MODEL_NAME = 'hedl-{}-{}.model'.format(LR, '2conv-basic') # just so we remember which saved model is which, sizes must match


In [6]:
def label_img(img,meta):
    #meta.loc[meta['image_id']==img,'label'].values[0]
    x = meta.loc[meta['image_id']==img,'label'].values[0]
    dis = meta['label'].unique().tolist()
    label = [0 for i in range(len(dis))]
    label[dis.index(x)] = 1
    return label

In [7]:

train_meta = pd.read_csv('../data/train.csv')
train_meta.loc[train_meta['image_id']=='train_1b']['label']
meta = pd.read_csv('../data/train.csv')
print(meta['label'].unique().tolist().index('rice'))
label = label_img('train_1d',meta)
print(str(label))

0
[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]


In [8]:
def create_train_data():
    training_data = []
    for img in tqdm(os.listdir(TRAIN_DIR)):
        meta = pd.read_csv('./data/train.csv')
        label = label_img(img.split('.')[0],meta)
        path = os.path.join(TRAIN_DIR,img)
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        training_data.append([np.array(img),np.array(label)])
    shuffle(training_data)
    np.save('train_data.npy', training_data)
    return training_data


In [9]:
def process_test_data():
    testing_data = []
    for img in tqdm(os.listdir(TEST_DIR)):
        path = os.path.join(TEST_DIR,img)
        img_num = img.split('.')[0]
        img = cv2.imread(path,cv2.IMREAD_GRAYSCALE)
        img = cv2.resize(img, (IMG_SIZE,IMG_SIZE))
        testing_data.append([np.array(img), img_num])
        
    shuffle(testing_data)
    np.save('test_data.npy', testing_data)
    return testing_data

In [10]:
#create_train_data()
#train_data = create_train_data()
train_data = np.load('train_data.npy')
len(train_data)

3215

In [13]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

convnet = input_data(shape=[None, IMG_SIZE, IMG_SIZE, 1], name='input')

convnet = conv_2d(convnet, 32, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = conv_2d(convnet, 64, 5, activation='relu')
convnet = max_pool_2d(convnet, 5)

convnet = fully_connected(convnet, 1024, activation='relu')
convnet = dropout(convnet, 0.8)

convnet = fully_connected(convnet, 25, activation='softmax')
convnet = regression(convnet, optimizer='adam', learning_rate=LR, loss='categorical_crossentropy', name='targets')

model = tflearn.DNN(convnet, tensorboard_dir='log')

ImportError: No module named 'tflearn.layers'

In [None]:
if os.path.exists('{}.meta'.format(MODEL_NAME)):
    model.load(MODEL_NAME)
    print('model loaded!')

In [None]:
train = train_data[:-215]
test = train_data[-215:]

In [None]:
X = np.array([i[0] for i in train]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
Y = np.array([i[1] for i in train])

test_x = np.array([i[0] for i in test]).reshape(-1,IMG_SIZE,IMG_SIZE,1)
test_y = np.array([i[1] for i in test])


In [None]:
model.fit({'input': X}, {'targets': Y}, n_epoch=2, validation_set=({'input': test_x}, {'targets': test_y}), 
    snapshot_step=50000, show_metric=True, run_id=MODEL_NAME)
