In [2]:
import matplotlib.pyplot as plt
import numpy as np
import os
import tensorflow as tf

from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator

! pip install -q kaggle
from google.colab import files

files.upload()
! mkdir ~/.kaggle

! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle datasets list
! kaggle datasets download -d egorgeorgiy/some-minerals
!unzip some-minerals.zip
import os, cv2
for root, dirs, files in os.walk('minerals_cropped_added/'):
    i = 0
    f = os.path.basename(root)
    for file in files:
      base = os.path.splitext(file)[0]
      image = cv2.imread(root + '/' + file)  # read the image (OpenCV)
      os.remove('minerals_cropped_added/' + f+'/'+file)
      cv2.imwrite('minerals_cropped_added/' + f + '/' + base + '.jpeg', image)
      i=i+1

!pip install split-folders
import splitfolders 
splitfolders.ratio("minerals_cropped_added/", output="output", ratio=(.8, .1, .1)) 


In [16]:
train_dir = os.path.join('/content/output/', 'train')
validation_dir = os.path.join('/content/output/', 'val')
test_dir = os.path.join('/content/output/', 'test')
BATCH_SIZE = 32
IMG_SIZE = (299, 299)


# Add our data-augmentation parameters to ImageDataGenerator
train_datagen = ImageDataGenerator(rescale = 1./255.,rotation_range = 20, width_shift_range = 0.2, height_shift_range = 0.2, shear_range = 0.2, zoom_range = 0.2, horizontal_flip = True, vertical_flip = True)

# Note that the validation data should not be augmented!
test_datagen = ImageDataGenerator( rescale = 1.0/255. )

train_generator = train_datagen.flow_from_directory(train_dir, batch_size = 32, target_size = (299, 299))

# Flow validation images in batches of 20 using test_datagen generator
validation_generator = test_datagen.flow_from_directory( validation_dir,  batch_size = 32, target_size = (299, 299))

test_generator = test_datagen.flow_from_directory( validation_dir,  batch_size = 32, target_size = (299, 299))

Found 1382 images belonging to 18 classes.
Found 165 images belonging to 18 classes.
Found 165 images belonging to 18 classes.


In [18]:
print(train_generator.class_indices.keys())

dict_keys(['agate', 'amethyst', 'aquamarine', 'azurite', 'beryl', 'calcite', 'chalcedony', 'corundum', 'diamond', 'elbaite', 'fluorite', 'gypsum', 'opal', 'pyrite', 'quartz', 'rhodochrosite', 'spinel', 'topaz'])


In [17]:
import keras
base_model = keras.applications.Xception(
    weights='imagenet',  
    input_shape=(299, 299, 3),
    include_top=False)

In [19]:
add_model = tf.keras.Sequential()
add_model.add(tf.keras.layers.Flatten())
add_model.add(tf.keras.layers.Dropout(rate = 0.5)) 
add_model.add(tf.keras.layers.Dense(units=512, activation=tf.nn.relu))
add_model.add(tf.keras.layers.Dropout(rate = 0.2)) 
add_model.add(tf.keras.layers.Dense(units=18, activation=tf.nn.softmax))

model = tf.keras.Model(inputs=base_model.input, outputs=add_model(base_model.output))
model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=1e-4),
              metrics=keras.metrics.CategoricalAccuracy())

In [20]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=4) 

history = model.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator,
    callbacks=[callback],
    verbose = 1
)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50


In [21]:
base_model.trainable = True


model.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(lr=1e-6),
              metrics=['accuracy'])

# Train end-to-end. Be careful to stop before you overfit!
history = model.fit(
    train_generator,
    epochs=20,
    validation_data=validation_generator,
    callbacks=[callback],
    verbose = 1
)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20


In [37]:
print("Evaluate on test data")
results = model.evaluate(test_generator, batch_size=32)
print("test loss, test acc:", results)
model.save('model_on_xception.h5')
model.save_weights('model_on_xception_weights.h5')


Evaluate on test data
test loss, test acc: [2.025449514389038, 0.521212100982666]


In [28]:
model_json=model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)