### Imports

In [1]:
import os, re, logging, json
from pprint import pprint
from tqdm import tqdm
from sklearn.metrics import confusion_matrix
import cv2 #conda install --channel https://conda.anaconda.org/menpo opencv3
from datetime import datetime
import random
import time

print_imports()

Loaded modules:
numpy                np              1.14.0
pandas               pd              0.22.0
sklearn              sk              0.19.1
keras                ks              2.1.4

matplotlib           mpl             2.1.2
matplotlib.pyplot    plt             N/A
matplotlib.image     mpimg           N/A
seaborn              sns             0.8.1
PIL                  PIL             5.0.0

ExergyUtilities      exergy          2.0.

pyspark              pyspark         2.2.1


In [2]:
mod_path = r"/home/batman/git/ai_drive/src"
sys.path.append(mod_path)
logging.debug("ADDED TO PATH: ".format(mod_path))
import drive.analysis_offline as analysis
import drive.my_generators as my_generators
import drive.my_plotting as my_plotting

### Paths

In [3]:
# Project path
this_project_path = r"/media/batman/USB STICK"
project_name = r'catdogruns'
path_root_project = os.path.join(this_project_path,project_name)
assert os.path.exists(path_root_project)

# Full data
path_data_root = r"/home/batman/Dropbox/DATA/cats_dogs_all_test_split"
path_test = os.path.join(path_data_root, 'my_test')

# Test data path
path_cats = os.path.join(path_test,'cats')
path_dogs = os.path.join(path_test,'dogs')

### Constants
IMG_SIZE = 150
layer_funcs = analysis.LAYER_FUNCS

## Main Loop

In [29]:
run_folders = [dir for dir in os.listdir(path_root_project) if re.match('run',dir)]
run_folders.sort()

# Loop run folders
for rfolder in run_folders:
    
    summary=dict()
    
    this_run_path = os.path.join(path_root_project,rfolder)
    logging.debug('**** RUN {} ****'.format(rfolder))
    
    ###### Log file ######
    log = analysis.get_log_file(this_run_path)
       
    
    print('start;',log['start'])
    summary['start'] = log['start'].__str__()
    print('elapsed; {:.1f}'.format(log['elapsed'].seconds/60))
    summary['elapsed'] = log['elapsed'].seconds/60
    print('generator;',log['generator'])
    summary['generator'] = log['generator']
    
    ###### Weights ######
    wts = analysis.get_weights(this_run_path)
    if wts: best_wt = wts[-1] # BEST weight (last weight)
    
    ###### Architecture ######
    model = analysis.load_model(this_run_path)
    arch_dict = analysis.read_model_json(this_run_path)
    
    #pprint(arch_dict)
    #raise
    
    #model.summary()
    parameter_counts = analysis.count_params(model)
    #print(analysis.count_params(model))
    
    ##### Reload weights #####
    if wts:
        #print(best_wt)
        model.load_weights(best_wt['path'])
        logging.debug("Loaded weights into model")
    
    #raise
    ###### Loop layers ######
    res = {i:layer for i,layer in enumerate(arch_dict['config'])}
    #print(res)
    #raise
    for i,layer in enumerate(arch_dict['config']):
        if layer['class_name'] == 'Dropout':
            print(layer['config']['rate'])
        #print(layer['class_name'])
        #pprint(layer)
        #layer_str = layer_funcs[layer['class_name']](layer)
        #print(i,layer_str)
    pprint(summary)
    raise
    
    ###### History ######
    path_hist = analysis.get_history(this_run_path)    
    with open(path_hist) as hist_file:
        hist_dict = json.load(hist_file)
    print("Epochs",hist_dict['params']['epochs'])
    print("Steps",hist_dict['params']['steps'])
    
    #my_plotting.plot_hist_dict(hist_dict)
    #del model
    #raise


root  - 10  - <ipython-input-29-6fe241528d30>  <module>                      : **** RUN run001 ****
start; 2018-03-21 11:11:50
elapsed; 441.8
generator; my_generators         get_train_generator_simple    : Training
root  - 20  - analysis_offline      get_weights                   : Found 19 weights files, total 501 MB = 26.4 MB per file
root  - 10  - analysis_offline      get_architecture_path         : Found architecture file at /media/batman/USB STICK/catdogruns/run001/saved_model_architecture.json
root  - 10  - analysis_offline      load_model                    : Model instantiated <keras.models.Sequential object at 0x7f56fd579ac8>
root  - 10  - analysis_offline      get_architecture_path         : Found architecture file at /media/batman/USB STICK/catdogruns/run001/saved_model_architecture.json
root  - 10  - analysis_offline      read_model_json               : Model json string loaded
root  - 10  - analysis_offline      count_params                  : Total 3453121, Trainable 34

RuntimeError: No active exception to reraise

## Iterate over the test images

generator

In [5]:
#cm = confusion_matrix(y, preds)

In [6]:
import keras.preprocessing.image
test_datagen_real = ks.preprocessing.image.ImageDataGenerator(rescale=1. / 255)

test_generator = test_datagen_real.flow_from_directory(
        path_test,
        target_size = (150,150),
        batch_size = 500,
        shuffle=False,
        #class_mode = "binary",
    );


Found 5000 images belonging to 2 classes.


In [7]:
# Data generation constants
num_batches = len(test_generator)
num_files = test_generator.n
batch_size = test_generator.batch_size

# Get filename numbers (indices)
fnames = test_generator.filenames
nums = [re.search('(?P<num>\d+).jpg',f).groups()[0]  for f in fnames]
nums = [int(n)  for n in nums]

In [8]:
seen_files = 0
test_generator.reset()
predictions_list = list()

t0 = time.time()

for i,batch in enumerate(test_generator):
    # Tally the actual seen images (tensor layers)
    seen_files += batch[0].shape[0]
    
    # Current index
    idx = test_generator.batch_index
    
    # Report
    logging.debug("{} seen {} / {} = {:.1f}%".format(idx,seen_files,num_files,seen_files/num_files*100))

    # Make predictions and append
    predictions = model.predict(batch[0])
    predictions = [i[0] for i in predictions]
    predictions_list += predictions
    
    # Seen all batches, break the loop 
    if i+1 == num_batches:
        break

t1 = time.time()        
logging.debug("Processed {} images in {} batches. Elapsed time: {}}".format(seen_files, num_batches, total = t1-t0))

root  - 10  - <ipython-input-8-de31b1a35d40>  <module>                      : 1 seen 500 / 5000 = 10.0%
root  - 10  - <ipython-input-8-de31b1a35d40>  <module>                      : 2 seen 1000 / 5000 = 20.0%
root  - 10  - <ipython-input-8-de31b1a35d40>  <module>                      : 3 seen 1500 / 5000 = 30.0%
root  - 10  - <ipython-input-8-de31b1a35d40>  <module>                      : 4 seen 2000 / 5000 = 40.0%
root  - 10  - <ipython-input-8-de31b1a35d40>  <module>                      : 5 seen 2500 / 5000 = 50.0%
root  - 10  - <ipython-input-8-de31b1a35d40>  <module>                      : 6 seen 3000 / 5000 = 60.0%
root  - 10  - <ipython-input-8-de31b1a35d40>  <module>                      : 7 seen 3500 / 5000 = 70.0%
root  - 10  - <ipython-input-8-de31b1a35d40>  <module>                      : 8 seen 4000 / 5000 = 80.0%
root  - 10  - <ipython-input-8-de31b1a35d40>  <module>                      : 9 seen 4500 / 5000 = 90.0%
root  - 10  - <ipython-input-8-de31b1a35d40>  <module>  

IndexError: tuple index out of range

In [None]:
assert len(predictions_list) == len(nums)
df_test = pd.DataFrame({'predicted probability':predictions_list},index = nums)
df_test.sort_index(inplace=True)
df_test.head()

In [None]:
assert len(df_test) == len(df_solutions)
assert (df_test.index == df_solutions.index).all()
df_acc = df_solutions.join(df_test)
df_acc.dtypes
df_acc.head()

Write results

In [None]:
with open('submission_file.csv','w') as f:
    f.write('id,label\n')
            
with open('submission_file.csv','a') as f:
    for data in tqdm(test_data):
        img_num = data[1]
        img_data = data[0]
        orig = img_data
        data = img_data.reshape(IMG_SIZE,IMG_SIZE,1)
        model_out = model.predict([data])[0]
        f.write('{},{}\n'.format(img_num,model_out[1]))

Make a single prediction

In [None]:

    #img=mpimg.imread(this_img_path)
    
    #img = mpimg.open(this_img_path)
    #img.load()
    
    
    #img = Image.open(this_img_path)
    #img.load()



    
    this_ax = fig.add_subplot(rows, columns, i)

    this_ax.set_title("{} {} {}".format(name, number, img.shape,))
    plt.imshow(img)
    plt.axis("off")
plt.show()

In [None]:
this_img_path = random.sample(test_image_paths,1)[0]
assert os.path.exists(this_img_path)
fname = os.path.split(this_img_path)[-1]
name, ext = fname.split(".")
num = int(name)

img=mpimg.imread(this_img_path)
#nop = np.array([None])
img = img[np.newaxis]
print(img.shape)
#img = np.append(nop, )
#cd_predict(model,img)
#model.predict(img, verbose=0)
#fit_img = test_datagen.fit(img)
fit_img_gen = test_datagen.flow(img)
for fit_img in fit_img_gen:
    print(fit_img.shape)
    model.predict(fit_img)



Get a sample

In [None]:
sample = random.sample(test_image_paths,1)
for p in sample:
    print(p)

In [None]:
def cd_predict(model,img):
    print("making predictions on test set...")
    predictions = model.predict(img, verbose=0)
    return predictions


In [None]:
model.evaluate(test_data, test_labels_one_hot)

In [None]:
#import Image
#

color = (17/255,17/255,17/255)


In [None]:
#model


In [None]:
predictions = model.predict(test, verbose=0)

In [None]:
for i in range(0,10):
    if predictions[i, 0] >= 0.5: 
        print('I am {:.2%} sure this is a Dog'.format(predictions[i][0]))
    else: 
        print('I am {:.2%} sure this is a Cat'.format(1-predictions[i][0]))
        
    plt.imshow(test[i].T)
    plt.show()