# This is an example for using CNN to perform chart classification 
(this code is modified from https://www.pyimagesearch.com/2020/04/27/fine-tuning-resnet-with-keras-tensorflow-and-deep-learning/)

In [None]:
!pip install opencv-python
!pip install imutils
!pip install tensorflow

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import AveragePooling2D
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import ResNet50
from sklearn.metrics import classification_report, accuracy_score
import numpy as np
import argparse

In [None]:
from imutils import paths

In [None]:
import tensorflow as tf

In [None]:
tf.test.is_gpu_available(cuda_only=False, min_cuda_compute_capability=None)

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
# import the necessary packages
import os



# initialize the base path to the *new* directory that will contain
# our images after computing the training and testing split
BASE_PATH = "../data/ImageClassification/"

# derive the training, validation, and testing directories
TRAIN_PATH = os.path.sep.join([BASE_PATH, "training"])
VAL_PATH = os.path.sep.join([BASE_PATH, "chart_classification_validation"])
TEST_PATH = os.path.sep.join([BASE_PATH, "testing"])

# define the amount of data that will be used training
#TRAIN_SPLIT = 0.8

# the amount of validation data will be a percentage of the
# *training* data
#VAL_SPLIT = 0.2

# define the names of the classes
CLASSES = ['Area','Bar','Box','Heatmap','Line','Scatter','Violin']

# initialize the initial learning rate, batch size, and number of
# epochs to train for
INIT_LR = 1e-4
#BS = 32
BS = 32
#NUM_EPOCHS = 20
NUM_EPOCHS = 1

# define the path to the serialized output model after training
MODEL_PATH = "chart_classfication_basic.model"

In [None]:
# determine the total number of image paths in training, validation,
# and testing directories
totalTrain = len(list(paths.list_images(TRAIN_PATH)))
totalVal = len(list(paths.list_images(VAL_PATH)))
totalTest = len(list(paths.list_images(TEST_PATH)))

In [None]:
# initialize the training training data augmentation object
trainAug = ImageDataGenerator(
	rotation_range=40,
	zoom_range=0.3,
	width_shift_range=0.3,
	height_shift_range=0.3,
	shear_range=0.3,
	horizontal_flip=True,
    #preprocessing_function=None
	fill_mode="nearest")

In [None]:
# initialize the validation/testing data augmentation object (which
# we'll be adding mean subtraction to)
valAug = ImageDataGenerator()

# define the ImageNet mean subtraction (in RGB order) and set the
# the mean subtraction value for each of the data augmentation
# objects
mean = np.array([123.68, 116.779, 103.939], dtype="float32")
trainAug.mean = mean
valAug.mean = mean

In [None]:
# initialize the training generator
trainGen = trainAug.flow_from_directory(
	TRAIN_PATH,
	class_mode="categorical",
	target_size=(224, 224),
	color_mode="rgb",
	shuffle=True,
	batch_size=BS)

# initialize the validation generator
valGen = valAug.flow_from_directory(
	VAL_PATH,
	class_mode="categorical",
	target_size=(224, 224),
	color_mode="rgb",
	shuffle=False,
	batch_size=BS)

# initialize the testing generator
testGen = valAug.flow_from_directory(
	TEST_PATH,
	class_mode="categorical",
	target_size=(224, 224),
	color_mode="rgb",
	shuffle=False,
	batch_size=BS)

In [None]:
trainGen.class_indices

In [None]:
# load the ResNet-50 network, ensuring the head FC layer sets are left
# off
print("[INFO] preparing model...")
baseModel = ResNet50(weights="imagenet", include_top=False,
	input_tensor=Input(shape=(224, 224, 3)))

# construct the head of the model that will be placed on top of the
# the base model
headModel = baseModel.output
headModel = AveragePooling2D(pool_size=(7, 7))(headModel)
headModel = Flatten(name="flatten")(headModel)
headModel = Dense(32,activation="relu")(headModel)
headModel = Dense(32,activation="relu")(headModel)
headModel = Dense(len(CLASSES), activation="softmax")(headModel)

# place the head FC model on top of the base model (this will become
# the actual model we will train)
model = Model(inputs=baseModel.input, outputs=headModel)

In [None]:
# loop over all layers in the base model and freeze them so they will
# *not* be updated during the training process
for layer in baseModel.layers:
	layer.trainable = False

# compile the model
opt = Adam(learning_rate=INIT_LR, decay=INIT_LR / NUM_EPOCHS)
# loss = "categorical_crossentropy" for more then two classes
# loss = "binary_crossentropy" for two classes
model.compile(loss="categorical_crossentropy", optimizer=opt,
	metrics=["accuracy"])

In [None]:
# train the model
print("[INFO] training model...")
H = model.fit(
	trainGen,
	steps_per_epoch=totalTrain // BS,
	validation_data=valGen,
	validation_steps=totalVal // BS,
	epochs=NUM_EPOCHS)

In [None]:
print("[INFO] evaluating network...")
testGen.reset()
predIdxs = model.predict(testGen,steps = totalTest//BS)
	#steps=(totalTest // config.BS) + 1)
# for each image in the testing set we need to find the index of the
# label with corresponding largest predicted probability
predIdxs = np.argmax(predIdxs, axis=1)
# show a nicely formatted classification report
print(classification_report(testGen.classes, predIdxs,
	target_names=testGen.class_indices.keys()))
#print(accuracy_score(testGen.classes, predIdxs))
# serialize the model to disk
print("[INFO] saving model...")
model.save(MODEL_PATH, save_format="h5")

In [None]:
# add an example to predict a real image with url

In [None]:
from PIL import Image
import requests

In [None]:
url = 'https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcR-HV5bf6hRFSIVySrBZAKN1YtB3qDAIsaOig&usqp=CAU'

In [None]:
im = Image.open(requests.get(url, stream=True).raw)

In [None]:
im

In [None]:
im.save("tmp.png")

In [None]:
#array = tf.keras.preprocessing.image.img_to_array(im)

In [None]:
from tensorflow.keras.preprocessing import image

In [None]:
img = image.load_img("tmp.png", target_size=(224, 224))

In [None]:
img_array = image.img_to_array(img)
img_batch = np.expand_dims(img_array, axis=0)
#you may do preprocessing to improve performance

In [None]:
model.predict(img_batch)

In [None]:
from tensorflow.keras.applications.resnet50 import preprocess_input

In [None]:
img_preprocessed = preprocess_input(img_batch)

In [None]:
model.predict(img_preprocessed)