In [1]:
#importing the required libraires
import numpy as np 
import re
import pandas as pd
import os,glob
import tensorflow as tf
import keras
from keras.layers import Input,Dense
from keras.models import Model,load_model
import math
from keras.applications import InceptionV3,VGG19,Xception,DenseNet201
from keras.layers.pooling import GlobalAveragePooling2D
import pathlib
from matplotlib import pyplot as plt

In [2]:
#retrieve public GCS paths from a public Kaggle dataset
from kaggle_datasets import KaggleDatasets

In [3]:
try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()
    print("Device:", tpu.master())
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
except:
    strategy = tf.distribute.get_strategy()
print("Number of replicas:", strategy.num_replicas_in_sync)

Device: grpc://10.0.0.2:8470
Number of replicas: 8


In [4]:
class ReadData():
    
    def __init__(self):
        self.input_shape = [192,192,3]
        self.epochs = 25
        self.batch_size = 16 * strategy.num_replicas_in_sync
        
        self.GCS_DS_PATH = KaggleDatasets().get_gcs_path()
        
        self.training_path = tf.io.gfile.glob(self.GCS_DS_PATH + '/tfrecords-jpeg-192x192/train/*.tfrec')
        self.validation_path = tf.io.gfile.glob(self.GCS_DS_PATH + '/tfrecords-jpeg-192x192/val/*.tfrec')
        self.test_path = tf.io.gfile.glob(self.GCS_DS_PATH + '/tfrecords-jpeg-192x192/test/*.tfrec')
        
       
        
    def count_images(filenames):
        return np.sum([int(re.compile(r"-([0-9]*)\.").search(filename).group(1)) for filename in filenames])

        
    def read_label_records(self,data):
        label_feature = {
             "image":tf.io.FixedLenFeature([],tf.string),
             "class":tf.io.FixedLenFeature([],tf.int64)
            
        }
        decoded = tf.io.parse_single_example(data,label_feature)
        
        image = tf.image.decode_jpeg(decoded['image'],channels=3)
        image = tf.cast(image,tf.float32)/255.0
        image = tf.reshape(image,self.input_shape)
        label = tf.cast(decoded['class'], tf.int32) #decoded['class']
        
        return image,label
    
    def read_unlabeled_records(self,data):
        label_feature = {
             "image":tf.io.FixedLenFeature([],tf.string),
             "id":tf.io.FixedLenFeature([],tf.string)
            
        }
        decoded = tf.io.parse_single_example(data,label_feature)
        
        image = tf.image.decode_jpeg(decoded['image'],channels=3)
        image = tf.cast(image,tf.float32)/255.0
        image = tf.reshape(image,self.input_shape)
        label = decoded['id'] #decoded['class']
        
        return image,label
    
    
    
    def load_dataset(self,filename,labeled=True,ordered=False):
        option_order = tf.data.Options()
        option_order.experimental_deterministic = False
        
        dataset = tf.data.TFRecordDataset(filename)
        dataset = dataset.with_options(option_order)
        dataset = dataset.map(self.read_label_records if labeled else self.read_unlabeled_records)
        return dataset
        
    def get_training_data(self):
        dataset = self.load_dataset(self.training_path,labeled=True,ordered=False)
        dataset = dataset.repeat()
        dataset = dataset.shuffle(2048)
        dataset = dataset.batch(self.batch_size)
        return dataset
    
    def get_validation_data(self):
        dataset = self.load_dataset(self.validation_path,labeled=True,ordered=False)
        dataset = dataset.batch(self.batch_size)
        dataset = dataset.cache()
        return dataset
    
    def get_test_data(self):
        dataset =self.load_dataset(self.test_path,labeled=False,ordered=True)
        dataset = dataset.batch(self.batch_size)
        return dataset
    
    GCS_DS_PATH = KaggleDatasets().get_gcs_path()
    training_path = tf.io.gfile.glob(GCS_DS_PATH + '/tfrecords-jpeg-192x192/train/*.tfrec')
    validation_path = tf.io.gfile.glob(GCS_DS_PATH + '/tfrecords-jpeg-192x192/val/*.tfrec')
    test_path = tf.io.gfile.glob(GCS_DS_PATH + '/tfrecords-jpeg-192x192/test/*.tfrec')
    
    num_training_samples = count_images(training_path)
    num_validation_samples = count_images(validation_path)
    num_testing_samples = count_images(test_path)    

In [5]:
class Base_Model():
    model = ''
    history = ''
    
    def fit_model(self):
        with strategy.scope():
            read_cls = ReadData()
            self.input_shape = [192,192,3]
            self.epochs = 25
            
        
            
            self.batch_size = 16 * strategy.num_replicas_in_sync
            self.train_images_count = read_cls.num_training_samples
            self.test_image_count = read_cls.num_testing_samples
            self.steps_per_epoch = self.train_images_count // self.batch_size
            
            

            self.base_model = DenseNet201(weights='imagenet',include_top=False,input_shape=self.input_shape)
            self.base_model.trainable = False
              
            global model,history
            
            model = tf.keras.Sequential([self.base_model,tf.keras.layers.GlobalAveragePooling2D(),
                                                tf.keras.layers.Dense(104, activation='softmax')])
            
            #opt = keras.optimizers.Adam(learning_rate=0.01)
            model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',
                              metrics=['accuracy']) 
                
            history = model.fit_generator(read_cls.get_training_data(),
                                       steps_per_epoch=self.steps_per_epoch,
                                       epochs=self.epochs,validation_data=read_cls.get_validation_data())
            
            self.generate_result()
            
    def generate_result(self):
        read_cls = ReadData()
        
        test_dataset =  read_cls.get_test_data()
        test = test_dataset.map(lambda image, label: image)
        test_predict = model.predict(test)    
        test_predict = np.argmax(test_predict, axis=-1)
            
        test_dataset = read_cls.get_test_data()
        test_ids_ds = test_dataset.map(lambda image, label: label).unbatch()
        test_ids = next(iter(test_ids_ds.batch(read_cls.num_testing_samples))).numpy().astype('U')
            
        submission = pd.DataFrame(test_ids, columns=['id'])
        submission['label'] = test_predict
        
        submission.to_csv('submission.csv', index=False)
        
        # plotting the graph
        self.plot_acc_graph()
        
    def plot_acc_graph(self):
        acc = history.history['accuracy']
        val_acc = history.history['val_accuracy']
        loss = history.history['loss']
        val_loss = history.history['val_loss']
        epochs=range(len(acc))
        
        fig = plt.figure(figsize=(5,5))
        plt.plot(epochs, acc, 'r', label="Training Accuracy")
        plt.plot(epochs, val_acc, 'b', label="Validation Accuracy")
        plt.xlabel('Epoch')
        plt.ylabel('Accuracy')
        plt.title('Training and validation accuracy')
        plt.legend(loc='lower right')
        plt.show()
        fig.savefig('Accuracy Plot.jpg')
        
        self.plot_loss_graph()
        
    def plot_loss_graph(self):
                    
        acc = history.history['accuracy']
        val_acc = history.history['val_accuracy']
        loss = history.history['loss']
        val_loss = history.history['val_loss']
        epochs=range(len(acc))
        
        fig2 = plt.figure(figsize=(5,5))
        plt.plot(epochs, loss, 'r', label="Training Loss")
        plt.plot(epochs, val_loss, 'b', label="Validation Loss")
        plt.legend(loc='upper right')
        plt.xlabel('Epoch')
        plt.ylabel('Loss')
        plt.title('Training and validation loss')
        plt.show()
        fig2.savefig('Loss Plot.jpg')
        
            

In [6]:
Base_Model().fit_model()

NameError: name 'ResNet152V2' is not defined