# Capstone Mushroom Classification 

The 

First we use beautifulSoup to scrape the website "svampguiden.com" to get the names of the most common mushrooms in Sweden. We will save these in a dictionary together with a label describing if they are edible or not.  

In [8]:
from urllib.request import urlopen, Request
from bs4 import BeautifulSoup


In [2]:
# Specify url of mushroom website
mushroom_url = 'http://svampguiden.com/databasen/lista/'

page = urlopen(mushroom_url)
soup = BeautifulSoup(page, 'html.parser')


In [3]:
latin_html = soup.find_all(class_="vnamn")

Some of the latin names have a last part to their name "lat.sv" that makes the google image searches a lot worse. Removing this makes the names yeild a lot better results on google. Also, the names are all 2 words long so we only have to look at the first two names

In [4]:
mushroom_dict = {}
for name in latin_html:
    mushroom_list = name.get_text().split()
    mushroom_dict[" ".join(mushroom_list[0:2])] = 1

In [5]:
mushroom_dict['Tricholoma sulphureum'] = mushroom_dict.pop('Tricholoma sulphureum/bryogenum')

print(mushroom_dict)
print(len(latin_html))

{'Agaricus altipes': 1, 'Agaricus arvensis': 1, 'Agaricus augustus': 1, 'Agaricus bernardii': 1, 'Agaricus bisporus': 1, 'Agaricus bitorquis': 1, 'Agaricus campestris': 1, 'Agaricus langei': 1, 'Agaricus litoralis': 1, 'Agaricus sylvaticus': 1, 'Agaricus sylvicola': 1, 'Agaricus urinascens': 1, 'Agaricus xanthoderma': 1, 'Agrocybe pediades': 1, 'Agrocybe praecox': 1, 'Albatrellus citrinus': 1, 'Albatrellus confluens': 1, 'Albatrellus ovinus': 1, 'Albatrellus subrubescens': 1, 'Albatrellus syringae': 1, 'Amanita ceciliae': 1, 'Amanita citrina': 1, 'Amanita excelsa': 1, 'Amanita gemmata': 1, 'Amanita lividopallescens': 1, 'Amanita muscaria': 1, 'Amanita pantherina': 1, 'Amanita phalloides': 1, 'Amanita rubescens': 1, 'Amanita virosa': 1, 'Armillaria mellea': 1, 'Artomyces cristatus': 1, 'Artomyces pyxidatus': 1, 'Asterophora lycoperdoides': 1, 'Aureoboletus gentilis': 1, 'Bankera fuligineoalba': 1, 'Boletopsis grisea': 1, 'Boletus edulis': 1, 'Boletus luridiformis': 1, 'Boletus pinophilu

### Scraping Google Images for pictures 

In [9]:
import requests
import re
import os
#import cookielib
import json

def get_soup(url,header):
    return BeautifulSoup(urlopen(Request(url,headers=header)),'html.parser')

def get_pictures(query):
    query = query # you can change the query for the image  here
    image_type="ActiOn"
    query= query.split()
    query='+'.join(query)
    url="https://www.google.co.in/search?q="+query+"&source=lnms&tbm=isch"
    print(url)
    #add the directory for your image here
    DIR="pictures"
    header={'User-Agent':"Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.134 Safari/537.36"
    }
    soup = get_soup(url,header)


    ActualImages=[]# contains the link for Large original images, type of  image
    for a in soup.find_all("div",{"class":"rg_meta"}):
        link , Type =json.loads(a.text)["ou"]  ,json.loads(a.text)["ity"]
        ActualImages.append((link,Type))

    print("there are total {} images".format(len(ActualImages)))

    if not os.path.exists(DIR):
        os.mkdir(DIR)
    DIR = os.path.join(DIR, query.split()[0])

    if not os.path.exists(DIR):
        os.mkdir(DIR)
        ###print images
        for i , (img , Type) in enumerate( ActualImages):

            try:
                #req = request(img, headers={'User-Agent' : header})
                raw_img = urlopen(img).read()

                cntr = len([i for i in os.listdir(DIR) if image_type in i]) + 1
                #print(cntr)
                if len(Type)==0:
                    f = open(os.path.join(DIR , image_type + "_"+ str(cntr)+".jpg"), 'wb')
                else :
                    f = open(os.path.join(DIR , image_type + "_"+ str(cntr)+"."+Type), 'wb')


                f.write(raw_img)
                f.close()
            except Exception as e:
                print("could not load : {}".format(img))
                print(e)
    else: 
        print(DIR)

In [10]:
for idx, name in enumerate(mushroom_dict): 
    print("Downloading pictures for {}".format(name))
    get_pictures(name)
    print("Done with iteration {}: {} ".format(idx, name))

Downloading pictures for Agaricus altipes
https://www.google.co.in/search?q=Agaricus+altipes&source=lnms&tbm=isch
there are total 100 images
pictures/Agaricus+altipes
Done with iteration 0: Agaricus altipes 
Downloading pictures for Agaricus arvensis
https://www.google.co.in/search?q=Agaricus+arvensis&source=lnms&tbm=isch
there are total 100 images
pictures/Agaricus+arvensis
Done with iteration 1: Agaricus arvensis 
Downloading pictures for Agaricus augustus
https://www.google.co.in/search?q=Agaricus+augustus&source=lnms&tbm=isch
there are total 100 images
pictures/Agaricus+augustus
Done with iteration 2: Agaricus augustus 
Downloading pictures for Agaricus bernardii
https://www.google.co.in/search?q=Agaricus+bernardii&source=lnms&tbm=isch
there are total 100 images
pictures/Agaricus+bernardii
Done with iteration 3: Agaricus bernardii 
Downloading pictures for Agaricus bisporus
https://www.google.co.in/search?q=Agaricus+bisporus&source=lnms&tbm=isch
there are total 100 images
pictures/

there are total 100 images
pictures/Boletus+luridiformis
Done with iteration 38: Boletus luridiformis 
Downloading pictures for Boletus pinophilus
https://www.google.co.in/search?q=Boletus+pinophilus&source=lnms&tbm=isch
there are total 100 images
pictures/Boletus+pinophilus
Done with iteration 39: Boletus pinophilus 
Downloading pictures for Boletus reticulatus
https://www.google.co.in/search?q=Boletus+reticulatus&source=lnms&tbm=isch
there are total 100 images
pictures/Boletus+reticulatus
Done with iteration 40: Boletus reticulatus 
Downloading pictures for Butyriboletus appendiculatus
https://www.google.co.in/search?q=Butyriboletus+appendiculatus&source=lnms&tbm=isch
there are total 100 images
pictures/Butyriboletus+appendiculatus
Done with iteration 41: Butyriboletus appendiculatus 
Downloading pictures for Butyriboletus fechtneri
https://www.google.co.in/search?q=Butyriboletus+fechtneri&source=lnms&tbm=isch
there are total 100 images
pictures/Butyriboletus+fechtneri
Done with iter

there are total 100 images
pictures/Cortinarius+orellanus
Done with iteration 75: Cortinarius orellanus 
Downloading pictures for Cortinarius rubellus
https://www.google.co.in/search?q=Cortinarius+rubellus&source=lnms&tbm=isch
there are total 100 images
pictures/Cortinarius+rubellus
Done with iteration 76: Cortinarius rubellus 
Downloading pictures for Cortinarius sanguineus
https://www.google.co.in/search?q=Cortinarius+sanguineus&source=lnms&tbm=isch
there are total 100 images
pictures/Cortinarius+sanguineus
Done with iteration 77: Cortinarius sanguineus 
Downloading pictures for Cortinarius traganus
https://www.google.co.in/search?q=Cortinarius+traganus&source=lnms&tbm=isch
there are total 100 images
pictures/Cortinarius+traganus
Done with iteration 78: Cortinarius traganus 
Downloading pictures for Cortinarius venetus
https://www.google.co.in/search?q=Cortinarius+venetus&source=lnms&tbm=isch
there are total 100 images
pictures/Cortinarius+venetus
Done with iteration 79: Cortinarius 

there are total 100 images
pictures/Gymnopilus+spectabilis
Done with iteration 112: Gymnopilus spectabilis 
Downloading pictures for Gymnopus dryophilus
https://www.google.co.in/search?q=Gymnopus+dryophilus&source=lnms&tbm=isch
there are total 100 images
pictures/Gymnopus+dryophilus
Done with iteration 113: Gymnopus dryophilus 
Downloading pictures for Gymnopus foetidus
https://www.google.co.in/search?q=Gymnopus+foetidus&source=lnms&tbm=isch
there are total 100 images
pictures/Gymnopus+foetidus
Done with iteration 114: Gymnopus foetidus 
Downloading pictures for Gymnopus fusipes
https://www.google.co.in/search?q=Gymnopus+fusipes&source=lnms&tbm=isch
there are total 100 images
pictures/Gymnopus+fusipes
Done with iteration 115: Gymnopus fusipes 
Downloading pictures for Gymnopus putillus
https://www.google.co.in/search?q=Gymnopus+putillus&source=lnms&tbm=isch
there are total 100 images
pictures/Gymnopus+putillus
Done with iteration 116: Gymnopus putillus 
Downloading pictures for Gyromit

there are total 100 images
pictures/Hygrophorus+cossus
Done with iteration 149: Hygrophorus cossus 
Downloading pictures for Hygrophorus discoideus
https://www.google.co.in/search?q=Hygrophorus+discoideus&source=lnms&tbm=isch
there are total 100 images
pictures/Hygrophorus+discoideus
Done with iteration 150: Hygrophorus discoideus 
Downloading pictures for Hygrophorus gliocyclus
https://www.google.co.in/search?q=Hygrophorus+gliocyclus&source=lnms&tbm=isch
there are total 100 images
pictures/Hygrophorus+gliocyclus
Done with iteration 151: Hygrophorus gliocyclus 
Downloading pictures for Hygrophorus hypothejus
https://www.google.co.in/search?q=Hygrophorus+hypothejus&source=lnms&tbm=isch
there are total 100 images
pictures/Hygrophorus+hypothejus
Done with iteration 152: Hygrophorus hypothejus 
Downloading pictures for Hygrophorus karstenii
https://www.google.co.in/search?q=Hygrophorus+karstenii&source=lnms&tbm=isch
there are total 100 images
pictures/Hygrophorus+karstenii
Done with iterat

there are total 100 images
pictures/Lactarius+sanguifluus
Done with iteration 186: Lactarius sanguifluus 
Downloading pictures for Lactarius semisanguifluus
https://www.google.co.in/search?q=Lactarius+semisanguifluus&source=lnms&tbm=isch
there are total 100 images
pictures/Lactarius+semisanguifluus
Done with iteration 187: Lactarius semisanguifluus 
Downloading pictures for Lactarius serifluus
https://www.google.co.in/search?q=Lactarius+serifluus&source=lnms&tbm=isch
there are total 100 images
pictures/Lactarius+serifluus
Done with iteration 188: Lactarius serifluus 
Downloading pictures for Lactarius torminosus
https://www.google.co.in/search?q=Lactarius+torminosus&source=lnms&tbm=isch
there are total 100 images
pictures/Lactarius+torminosus
Done with iteration 189: Lactarius torminosus 
Downloading pictures for Lactarius trivialis
https://www.google.co.in/search?q=Lactarius+trivialis&source=lnms&tbm=isch
there are total 100 images
pictures/Lactarius+trivialis
Done with iteration 190:

there are total 100 images
pictures/Mycena+amicta
Done with iteration 223: Mycena amicta 
Downloading pictures for Mycena aurantiomarginata
https://www.google.co.in/search?q=Mycena+aurantiomarginata&source=lnms&tbm=isch
there are total 100 images
pictures/Mycena+aurantiomarginata
Done with iteration 224: Mycena aurantiomarginata 
Downloading pictures for Mycena capillaripes
https://www.google.co.in/search?q=Mycena+capillaripes&source=lnms&tbm=isch
there are total 100 images
pictures/Mycena+capillaripes
Done with iteration 225: Mycena capillaripes 
Downloading pictures for Mycena capillaris
https://www.google.co.in/search?q=Mycena+capillaris&source=lnms&tbm=isch
there are total 100 images
pictures/Mycena+capillaris
Done with iteration 226: Mycena capillaris 
Downloading pictures for Mycena chlorantha
https://www.google.co.in/search?q=Mycena+chlorantha&source=lnms&tbm=isch
there are total 100 images
pictures/Mycena+chlorantha
Done with iteration 227: Mycena chlorantha 
Downloading pictur

there are total 100 images
pictures/Pluteus+cervinus
Done with iteration 261: Pluteus cervinus 
Downloading pictures for Porphyrellus porphyrosporus
https://www.google.co.in/search?q=Porphyrellus+porphyrosporus&source=lnms&tbm=isch
there are total 100 images
pictures/Porphyrellus+porphyrosporus
Done with iteration 262: Porphyrellus porphyrosporus 
Downloading pictures for Psathyrella candolleana
https://www.google.co.in/search?q=Psathyrella+candolleana&source=lnms&tbm=isch
there are total 100 images
pictures/Psathyrella+candolleana
Done with iteration 263: Psathyrella candolleana 
Downloading pictures for Pseudomerulius aureus
https://www.google.co.in/search?q=Pseudomerulius+aureus&source=lnms&tbm=isch
there are total 100 images
pictures/Pseudomerulius+aureus
Done with iteration 264: Pseudomerulius aureus 
Downloading pictures for Psilocybe semilanceata
https://www.google.co.in/search?q=Psilocybe+semilanceata&source=lnms&tbm=isch
there are total 100 images
pictures/Psilocybe+semilancea

there are total 100 images
pictures/Russula+roseipes
Done with iteration 299: Russula roseipes 
Downloading pictures for Russula sanguinea
https://www.google.co.in/search?q=Russula+sanguinea&source=lnms&tbm=isch
there are total 100 images
pictures/Russula+sanguinea
Done with iteration 300: Russula sanguinea 
Downloading pictures for Russula torulosa
https://www.google.co.in/search?q=Russula+torulosa&source=lnms&tbm=isch
there are total 100 images
pictures/Russula+torulosa
Done with iteration 301: Russula torulosa 
Downloading pictures for Russula versicolor
https://www.google.co.in/search?q=Russula+versicolor&source=lnms&tbm=isch
there are total 100 images
pictures/Russula+versicolor
Done with iteration 302: Russula versicolor 
Downloading pictures for Russula vesca
https://www.google.co.in/search?q=Russula+vesca&source=lnms&tbm=isch
there are total 100 images
pictures/Russula+vesca
Done with iteration 303: Russula vesca 
Downloading pictures for Russula vinosa
https://www.google.co.in

there are total 100 images
pictures/Suillus+variegatus
Done with iteration 337: Suillus variegatus 
Downloading pictures for Tapinella atrotomentosa
https://www.google.co.in/search?q=Tapinella+atrotomentosa&source=lnms&tbm=isch
there are total 100 images
pictures/Tapinella+atrotomentosa
Done with iteration 338: Tapinella atrotomentosa 
Downloading pictures for Thelephora palmata
https://www.google.co.in/search?q=Thelephora+palmata&source=lnms&tbm=isch
there are total 100 images
pictures/Thelephora+palmata
Done with iteration 339: Thelephora palmata 
Downloading pictures for Tremellodendropsis tuberosa
https://www.google.co.in/search?q=Tremellodendropsis+tuberosa&source=lnms&tbm=isch
there are total 100 images
pictures/Tremellodendropsis+tuberosa
Done with iteration 340: Tremellodendropsis tuberosa 
Downloading pictures for Tricholoma aestuans
https://www.google.co.in/search?q=Tricholoma+aestuans&source=lnms&tbm=isch
there are total 100 images
pictures/Tricholoma+aestuans
Done with iter

could not load : https://www.fungipedia.org/images/galerias/verpa_bohemica/verpa_bohemica3.jpg
HTTP Error 403: Forbidden
Done with iteration 364: Verpa bohemica 
Downloading pictures for Verpa conica
https://www.google.co.in/search?q=Verpa+conica&source=lnms&tbm=isch
there are total 100 images
could not load : http://i1.treknature.com/photos/14710/vinger1.jpg
HTTP Error 403: Forbidden
could not load : http://i1.treknature.com/photos/14710/vingerhoedje11-001.jpg
HTTP Error 403: Forbidden
could not load : http://www.verpa.us/wp-content/uploads/2016/10/Amanita_muscaria_3_vliegenzwammen_op_rij-1024x768.jpg
HTTP Error 403: Forbidden
Done with iteration 365: Verpa conica 
Downloading pictures for Xerocomellus chrysenteron
https://www.google.co.in/search?q=Xerocomellus+chrysenteron&source=lnms&tbm=isch
there are total 100 images
could not load : https://upload.wikimedia.org/wikipedia/commons/d/da/Xerocomus_chrysenteron_041031w.jpg
HTTP Error 404: Not Found
could not load : http://fungiflora.c

## Preprocessing 
The pictures are now downloaded and we should preprocess them to be able to feed them into the keras model 

In [14]:
import os
import imghdr
teeeest = imghdr.what("pictures/Steccherinum+robustius/ActiOn_75.jpg")

# remove 0 size files
for directory in os.listdir("pictures/"):
    
    if os.path.isdir("pictures/"+directory):
        for filename in os.listdir("pictures/"+directory):
            if imghdr.what("pictures/"+directory+"/" + filename) != "jpeg" and not filename.endswith("Store"):
                os.remove("pictures/"+directory+"/" + filename)
                print("pictures/"+directory+"/" + filename)
            elif os.stat("pictures/"+directory+"/" + filename).st_size < 2000:
                os.remove("pictures/"+directory+"/" + filename)
                print("pictures/"+directory+"/" + filename)
                

pictures/Gomphidius+roseus/ActiOn_95.jpg
pictures/Lactarius+semisanguifluus/ActiOn_51.jpg
pictures/Helvella+acetabulum/ActiOn_70.jpg
pictures/Lentinellus+vulpinus/ActiOn_89.jpg
pictures/Cystolepiota+adulterina/ActiOn_83.jpg
pictures/Suillus+grevillei/ActiOn_83.jpg
pictures/Lactarius+pyrogalus/ActiOn_62.jpg
pictures/Mycena+chlorantha/ActiOn_57.jpg
pictures/Inocybe+nitidiuscula/ActiOn_50.jpg


In [15]:
from sklearn.datasets import load_files       
from keras.utils import np_utils
import numpy as np
from glob import glob

Using TensorFlow backend.
  return f(*args, **kwds)


In [20]:
def load_dataset(path):
    data = load_files(path)
    all_files = np.array(data['filenames'])
    all_targets = np_utils.to_categorical(np.array(data['target']), 371)
    return all_files, all_targets

In [21]:
mushroom_names = [item[9:] for item in sorted(glob("pictures/*"))]

In [22]:
print(len(mushroom_names))

371


In [23]:
all_files, all_targets = load_dataset('pictures')

In [24]:
all_targets.shape

(34946, 371)

When examining the dataset downloaded from google I found that there were gifs, php files and other things that came from my scarping function. To delete all unwanted items I decided to remove all pictures that are not jpg's. 
This will remove files like png's and other formats but I also found that most pictures in the png file format were pictures of maps where the mushrooms could be found. 

In [25]:
print(all_files.shape)
print(all_targets.shape)

clean_all_files = all_files
clean_all_targets = all_targets
id_to_delete = []
#for idx, file in reversed(list(enumerate(all_files))): 
#    if file[-4:] != '.jpg':
#        id_to_delete.append(idx)
#        clean_all_files = np.delete(clean_all_files,idx, axis=0)
#        clean_all_targets = np.delete(clean_all_targets,idx, axis=0)
        



print(all_files.shape)
print(all_targets.shape)
print(clean_all_files.shape)
print(clean_all_targets.shape)

(34946,)
(34946, 371)
(34946,)
(34946, 371)
(34946,)
(34946, 371)


In [26]:
# Just to make sure that my funtion deleted the items from the np arrays correctly
print(all_targets.shape[0] - clean_all_targets.shape[0])
print(len(id_to_delete))

0
0


#### Training, testing and validation sets
We also need to divide the pictures into training, validation and test sets. To do this we will use sklearn train test split twice. First to divide the whole dataset into 80-20 training-test/validation split we then split the test/validation into two resulting in the follwoing split: 

Train: 80% 
Test: 10%
Validation: 10%

In [27]:
from sklearn.model_selection import train_test_split 


# Frist split 
train_files,test_valid_files, train_targets, test_valid_targets = train_test_split(clean_all_files, 
                                                                                    clean_all_targets, 
                                                                                    test_size=0.2, 
                                                                                    random_state=0)

test_files,  valid_files, test_targets, valid_targets = train_test_split(test_valid_files, 
                                                                                    test_valid_targets, 
                                                                                    test_size=0.5, 
                                                                                    random_state=0)


print("The train dataset is of size files: {} target: {}".format(train_files.shape, train_targets.shape))
print("The train dataset is of size files: {} target: {}".format(test_files.shape, test_targets.shape))
print("The train dataset is of size files: {} target: {}".format(valid_files.shape, valid_targets.shape))

print("The dataset is now split up into: {} {} {}".format(train_files.shape[0]/clean_all_targets.shape[0],
                                                         test_files.shape[0]/clean_all_targets.shape[0],
                                                         valid_files.shape[0]/clean_all_targets.shape[0]))


The train dataset is of size files: (27956,) target: (27956, 371)
The train dataset is of size files: (3495,) target: (3495, 371)
The train dataset is of size files: (3495,) target: (3495, 371)
The dataset is now split up into: 0.7999771075373433 0.10001144623132834 0.10001144623132834


To be able to feed our pictures into Keras we have to transform them into a 4D tensor with shape: 

$$
(\text{nb_samples}, \text{rows}, \text{columns}, \text{channels}),
$$

where nb_samples corresponds to the total number of images (or samples), and rows, columns, and channels correspond to the number of rows, columns, and channels for each image, respectively.
The path_to_tensor function below takes a string-valued file path to a color image as input and returns a 4D tensor suitable for supplying to a Keras CNN. The function first loads the image and resizes it to a square image that is $224 \times 224$ pixels. Next, the image is converted to an array, which is then resized to a 4D tensor. In this case, since we are working with color images, each image has three channels. Likewise, since we are processing a single image (or sample), the returned tensor will always have shape
$$
(1, 224, 224, 3).
$$
The paths_to_tensor function takes a numpy array of string-valued image paths as input and returns a 4D tensor with shape
$$
(\text{nb_samples}, 224, 224, 3).
$$
Here, nb_samples is the number of samples, or number of images, in the supplied array of image paths. It is best to think of nb_samples as the number of 3D tensors (where each 3D tensor corresponds to a different image) in your dataset!


In [28]:
import imghdr
teeeest = imghdr.what("pictures/Steccherinum+robustius/ActiOn_72.jpg")
print(teeeest)

jpeg


In [44]:
from tqdm import tqdm
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.preprocessing import image
from keras import applications
#from PIL import Image
#from IPython.display import display

print(image.load_img)

def path_to_tensor(img_path):
    # loads RGB image as PIL.Image.Image type
    try:
        if img_path == "pictures/Paxillus+filamentosus/ActiOn_82.jpg":
            img_path = "pictures/Paxillus+filamentosus/ActiOn_81.jpg"

        img = image.load_img(img_path, target_size=(224, 224))
        # convert PIL.Image.Image type to 3D tensor with shape (224, 224, 3)
        x = image.img_to_array(img)
    except ZeroDivisionError as detail:
        print(detail)
        print(img_path)
        
    
    # convert 3D tensor to 4D tensor with shape (1, 224, 224, 3) and return 4D tensor
    return np.expand_dims(x, axis=0)
    

def paths_to_tensor(img_paths):
    list_of_tensors = [path_to_tensor(img_path) for img_path in tqdm(img_paths)]
    return np.vstack(list_of_tensors)

<function load_img at 0x273a4ce18>


In [30]:
train_tensors = paths_to_tensor(train_files).astype('float32')/255
valid_tensors = paths_to_tensor(valid_files).astype('float32')/255
test_tensors = paths_to_tensor(test_files).astype('float32')/255

100%|██████████| 27956/27956 [25:25<00:00, 18.32it/s]
100%|██████████| 3495/3495 [02:39<00:00, 21.94it/s]
 80%|███████▉  | 2793/3495 [02:22<00:35, 19.54it/s]

ZeroDivisionError: division by zero

 80%|███████▉  | 2793/3495 [02:40<00:40, 17.45it/s]

In [45]:
test_tensors = paths_to_tensor(test_files).astype('float32')/255 






  0%|          | 0/3495 [00:00<?, ?it/s][A[A[A[A[A




  0%|          | 2/3495 [00:00<03:12, 18.17it/s][A[A[A[A[A




  0%|          | 5/3495 [00:00<04:12, 13.80it/s][A[A[A[A[A




  0%|          | 12/3495 [00:00<02:25, 23.99it/s][A[A[A[A[A




  0%|          | 17/3495 [00:00<02:09, 26.91it/s][A[A[A[A[A




  1%|          | 21/3495 [00:00<02:02, 28.32it/s][A[A[A[A[A




  1%|          | 24/3495 [00:01<03:18, 17.51it/s][A[A[A[A[A




  1%|          | 27/3495 [00:01<04:08, 13.95it/s][A[A[A[A[A




  1%|          | 30/3495 [00:02<03:56, 14.66it/s][A[A[A[A[A




  1%|          | 32/3495 [00:02<05:07, 11.25it/s][A[A[A[A[A




  1%|          | 37/3495 [00:03<04:44, 12.15it/s][A[A[A[A[A




  1%|          | 40/3495 [00:03<04:31, 12.71it/s][A[A[A[A[A




  1%|▏         | 44/3495 [00:03<04:16, 13.45it/s][A[A[A[A[A




  1%|▏         | 47/3495 [00:03<04:20, 13.26it/s][A[A[A[A[A




  1%|▏         | 51/3495 [00:03<04:06, 1

 14%|█▍        | 503/3495 [00:25<02:32, 19.62it/s][A[A[A[A[A




 14%|█▍        | 506/3495 [00:26<02:36, 19.14it/s][A[A[A[A[A




 15%|█▍        | 512/3495 [00:26<02:34, 19.29it/s][A[A[A[A[A




 15%|█▍        | 516/3495 [00:26<02:34, 19.34it/s][A[A[A[A[A




 15%|█▍        | 520/3495 [00:27<02:35, 19.18it/s][A[A[A[A[A




 15%|█▍        | 523/3495 [00:27<02:35, 19.09it/s][A[A[A[A[A




 15%|█▌        | 527/3495 [00:27<02:35, 19.14it/s][A[A[A[A[A




 15%|█▌        | 530/3495 [00:27<02:36, 18.95it/s][A[A[A[A[A




 15%|█▌        | 534/3495 [00:28<02:35, 19.01it/s][A[A[A[A[A




 15%|█▌        | 537/3495 [00:28<02:36, 18.93it/s][A[A[A[A[A




 16%|█▌        | 542/3495 [00:28<02:35, 18.98it/s][A[A[A[A[A




 16%|█▌        | 545/3495 [00:28<02:35, 19.01it/s][A[A[A[A[A




 16%|█▌        | 549/3495 [00:29<02:36, 18.77it/s][A[A[A[A[A




 16%|█▌        | 553/3495 [00:29<02:36, 18.83it/s][A[A[A[A[A




 16%|█▌        | 557

 29%|██▉       | 1016/3495 [00:49<02:01, 20.35it/s][A[A[A[A[A




 29%|██▉       | 1020/3495 [00:50<02:01, 20.38it/s][A[A[A[A[A




 29%|██▉       | 1024/3495 [00:50<02:01, 20.33it/s][A[A[A[A[A




 29%|██▉       | 1030/3495 [00:50<02:00, 20.41it/s][A[A[A[A[A




 30%|██▉       | 1034/3495 [00:50<02:00, 20.42it/s][A[A[A[A[A




 30%|██▉       | 1040/3495 [00:50<01:59, 20.47it/s][A[A[A[A[A




 30%|██▉       | 1044/3495 [00:50<01:59, 20.50it/s][A[A[A[A[A




 30%|██▉       | 1048/3495 [00:51<01:59, 20.53it/s][A[A[A[A[A




 30%|███       | 1052/3495 [00:51<01:58, 20.57it/s][A[A[A[A[A




 30%|███       | 1056/3495 [00:51<01:58, 20.59it/s][A[A[A[A[A




 30%|███       | 1060/3495 [00:51<01:58, 20.46it/s][A[A[A[A[A




 30%|███       | 1064/3495 [00:51<01:58, 20.50it/s][A[A[A[A[A




 31%|███       | 1068/3495 [00:52<01:58, 20.50it/s][A[A[A[A[A




 31%|███       | 1072/3495 [00:52<01:58, 20.53it/s][A[A[A[A[A




 31%|█

 44%|████▍     | 1537/3495 [01:16<01:37, 20.01it/s][A[A[A[A[A




 44%|████▍     | 1541/3495 [01:16<01:37, 20.02it/s][A[A[A[A[A




 44%|████▍     | 1545/3495 [01:17<01:37, 20.04it/s][A[A[A[A[A




 44%|████▍     | 1549/3495 [01:17<01:37, 19.93it/s][A[A[A[A[A




 44%|████▍     | 1552/3495 [01:17<01:37, 19.93it/s][A[A[A[A[A




 44%|████▍     | 1555/3495 [01:18<01:37, 19.85it/s][A[A[A[A[A




 45%|████▍     | 1557/3495 [01:18<01:38, 19.75it/s][A[A[A[A[A




 45%|████▍     | 1559/3495 [01:18<01:38, 19.75it/s][A[A[A[A[A




 45%|████▍     | 1564/3495 [01:19<01:37, 19.77it/s][A[A[A[A[A




 45%|████▍     | 1570/3495 [01:19<01:37, 19.82it/s][A[A[A[A[A




 45%|████▌     | 1576/3495 [01:19<01:36, 19.86it/s][A[A[A[A[A




 45%|████▌     | 1580/3495 [01:19<01:36, 19.86it/s][A[A[A[A[A




 45%|████▌     | 1586/3495 [01:19<01:35, 19.91it/s][A[A[A[A[A




 45%|████▌     | 1590/3495 [01:20<01:35, 19.85it/s][A[A[A[A[A




 46%|█

 58%|█████▊    | 2020/3495 [01:46<01:17, 19.06it/s][A[A[A[A[A




 58%|█████▊    | 2022/3495 [01:46<01:17, 19.06it/s][A[A[A[A[A




 58%|█████▊    | 2026/3495 [01:46<01:17, 19.07it/s][A[A[A[A[A




 58%|█████▊    | 2030/3495 [01:46<01:16, 19.08it/s][A[A[A[A[A




 58%|█████▊    | 2033/3495 [01:46<01:16, 19.08it/s][A[A[A[A[A




 58%|█████▊    | 2039/3495 [01:46<01:16, 19.12it/s][A[A[A[A[A




 58%|█████▊    | 2044/3495 [01:46<01:15, 19.14it/s][A[A[A[A[A




 59%|█████▊    | 2050/3495 [01:46<01:15, 19.18it/s][A[A[A[A[A




 59%|█████▉    | 2055/3495 [01:47<01:14, 19.20it/s][A[A[A[A[A




 59%|█████▉    | 2061/3495 [01:47<01:14, 19.24it/s][A[A[A[A[A




 59%|█████▉    | 2068/3495 [01:47<01:14, 19.28it/s][A[A[A[A[A




 59%|█████▉    | 2076/3495 [01:47<01:13, 19.34it/s][A[A[A[A[A




 60%|█████▉    | 2082/3495 [01:47<01:13, 19.35it/s][A[A[A[A[A




 60%|█████▉    | 2087/3495 [01:48<01:12, 19.29it/s][A[A[A[A[A




 60%|█

 72%|███████▏  | 2529/3495 [02:09<00:49, 19.48it/s][A[A[A[A[A




 73%|███████▎  | 2534/3495 [02:10<00:49, 19.44it/s][A[A[A[A[A




 73%|███████▎  | 2538/3495 [02:10<00:49, 19.45it/s][A[A[A[A[A




 73%|███████▎  | 2541/3495 [02:10<00:49, 19.45it/s][A[A[A[A[A




 73%|███████▎  | 2544/3495 [02:10<00:48, 19.46it/s][A[A[A[A[A




 73%|███████▎  | 2549/3495 [02:10<00:48, 19.48it/s][A[A[A[A[A




 73%|███████▎  | 2558/3495 [02:10<00:47, 19.53it/s][A[A[A[A[A




 73%|███████▎  | 2563/3495 [02:11<00:47, 19.54it/s][A[A[A[A[A




 73%|███████▎  | 2567/3495 [02:11<00:47, 19.54it/s][A[A[A[A[A




 74%|███████▎  | 2571/3495 [02:11<00:47, 19.55it/s][A[A[A[A[A




 74%|███████▎  | 2576/3495 [02:11<00:46, 19.57it/s][A[A[A[A[A




 74%|███████▍  | 2580/3495 [02:11<00:46, 19.58it/s][A[A[A[A[A




 74%|███████▍  | 2586/3495 [02:11<00:46, 19.61it/s][A[A[A[A[A




 74%|███████▍  | 2590/3495 [02:12<00:46, 19.61it/s][A[A[A[A[A




 74%|█

 88%|████████▊ | 3065/3495 [02:36<00:22, 19.53it/s][A[A[A[A[A




 88%|████████▊ | 3069/3495 [02:37<00:21, 19.53it/s][A[A[A[A[A




 88%|████████▊ | 3072/3495 [02:37<00:21, 19.54it/s][A[A[A[A[A




 88%|████████▊ | 3075/3495 [02:37<00:21, 19.51it/s][A[A[A[A[A




 88%|████████▊ | 3081/3495 [02:37<00:21, 19.54it/s][A[A[A[A[A




 88%|████████▊ | 3087/3495 [02:37<00:20, 19.56it/s][A[A[A[A[A




 88%|████████▊ | 3093/3495 [02:37<00:20, 19.58it/s][A[A[A[A[A




 89%|████████▊ | 3097/3495 [02:38<00:20, 19.59it/s][A[A[A[A[A




 89%|████████▊ | 3101/3495 [02:38<00:20, 19.60it/s][A[A[A[A[A




 89%|████████▉ | 3106/3495 [02:38<00:19, 19.62it/s][A[A[A[A[A




 89%|████████▉ | 3110/3495 [02:38<00:19, 19.62it/s][A[A[A[A[A




 89%|████████▉ | 3114/3495 [02:38<00:19, 19.64it/s][A[A[A[A[A




 89%|████████▉ | 3118/3495 [02:38<00:19, 19.64it/s][A[A[A[A[A




 89%|████████▉ | 3122/3495 [02:38<00:18, 19.65it/s][A[A[A[A[A




 89%|█

In [47]:
top_model_weights_path = 'bottleneck_fc_model.h5'
nb_train_samples = train_files.shape[0]
nb_validation_samples = valid_files.shape[0]
epochs = 50
batch_size = 16

In [46]:
def save_bottlebeck_features():
    datagen = ImageDataGenerator()

    # build the VGG16 network
    model = applications.VGG16(include_top=False, weights='imagenet')

    generator = datagen.flow(
        train_tensors,
        train_targets,
        batch_size=batch_size)
    
    bottleneck_features_train = model.predict_generator(
        generator, nb_train_samples // batch_size)
    np.save(open('bottleneck_features_train.npy', 'w'),
            bottleneck_features_train)

    generator = datagen.flow_from_directory(
        valid_tensors,
        valid_targets,
        batch_size=batch_size)
    
    bottleneck_features_validation = model.predict_generator(
        generator, nb_validation_samples // batch_size)
    np.save(open('bottleneck_features_validation.npy', 'w'),
            bottleneck_features_validation)

In [None]:
def train_top_model():
    train_data = np.load(open('bottleneck_features_train.npy'))
    train_labels = np.array(
        [0] * (nb_train_samples / 2) + [1] * (nb_train_samples / 2))

    validation_data = np.load(open('bottleneck_features_validation.npy'))
    validation_labels = np.array(
        [0] * (nb_validation_samples / 2) + [1] * (nb_validation_samples / 2))

    model = Sequential()
    model.add(Flatten(input_shape=train_data.shape[1:]))
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='rmsprop',
                  loss='binary_crossentropy', metrics=['accuracy'])

    model.fit(train_data, train_labels,
              epochs=epochs,
              batch_size=batch_size,
              validation_data=(validation_data, validation_labels))
    model.save_weights(top_model_weights_path)