In [None]:
import tensorflow as tf
import numpy as np
import os
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import sqlite3

In [None]:
from settings.config import log_config, db_file, IMAGE_FOLDER, SESSION_OUTPUT_FOLDER
import db.db_funcs as dbf
images_dir = os.listdir(IMAGE_FOLDER)

In [None]:
data_list = []

with dbf.db_ops(db_file) as cursor:
    session_id = 2
    cursor.execute("select name as filename, classification from image where session_id = ? and classification != '';", (session_id,))
    rows = cursor.fetchall()
    
    for row in rows:
        data_list.append({'filename': IMAGE_FOLDER+'/'+row[0], 'class': row[1]})
    print(data_list)

In [None]:

# Extract unique classes and map them to integers
class_names = sorted(set(item["class"] for item in data_list))
number_of_classes = len(class_names)
map_label_to_index = {label: index for index, label in enumerate(class_names)}
map_index_to_label = {index: label for label, index in map_label_to_index.items()}
map_label_to_categorical = {label: tf.keras.utils.to_categorical(index, num_classes=len(class_names)) for index, label in map_index_to_label.items()}
print(map_label_to_categorical)

for item in data_list:
    item["label"] = map_label_to_categorical[item["class"]]

In [None]:
# Function to load and preprocess each image
def load_image(filename, label):

    image = tf.io.read_file(filename)
    image = tf.image.decode_image(image, channels=3, expand_animations = False)
    image = tf.image.resize(image, [256, 256])
    return image, label

def create_dataset(data_list):
    filenames = [item['filename'] for item in data_list]
    # print(filenames)
    labels = [item['label'] for item in data_list]
    
    # Create a dataset from the filenames and labels
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    
    # Map the process_image function to each element
    dataset = dataset.map(load_image)
    
    return dataset

dataset = create_dataset(data_list)

# # Batch the dataset
batch_size = 32
dataset = dataset.batch(batch_size)


In [None]:
dataset.element_spec


In [None]:
data_iterator = dataset.as_numpy_iterator()

In [None]:
batch = data_iterator.next()

In [None]:
batch[1]
# 0 screenshots
# 1 keep
# 2 work
# 100 screenshots
# 010 keep
# 001 work

In [None]:
fig, ax = plt.subplots(ncols=5, figsize=(20, 20))
for idx, img in enumerate(batch[0][:5]):
    ax[idx].imshow(img.astype(int))
    ax[idx].title.set_text(batch[1][idx])

In [None]:
# Scale data
dataset = dataset.map(lambda x, y: (x / 255.0, y))
scaled_iterator = dataset.as_numpy_iterator()


In [None]:
batch = scaled_iterator.next()

In [None]:
batch[0].max()


In [None]:
batch[0].shape

In [None]:
fig, ax = plt.subplots(ncols=5, figsize=(20, 20))
for idx, img in enumerate(batch[0][:5]):
    ax[idx].imshow(img)
    ax[idx].title.set_text(batch[1][idx])

In [None]:
# Split data
test_size = int(0.1 * len(dataset)) or 1
val_size = int(0.2 * len(dataset)) or 1
train_size = len(dataset) - test_size - val_size
print(train_size, val_size, test_size)

In [None]:
train = dataset.take(train_size)
val = dataset.skip(train_size).take(val_size)
test = dataset.skip(train_size + val_size).take(test_size)

In [None]:
model = tf.keras.models.Sequential([
    # Note the input shape is the desired size (batch[0].shape) of the image 256x256 with 3 bytes color
    # This is the first convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu', input_shape=(256, 256, 3)),
    tf.keras.layers.MaxPooling2D(2, 2),
    # The second convolution
    tf.keras.layers.Conv2D(128, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # The third convolution
    tf.keras.layers.Conv2D(64, (3,3), activation='relu'),
    tf.keras.layers.MaxPooling2D(2,2),
    # Flatten the results to feed into a DNN
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    # 512 neuron hidden layer
    tf.keras.layers.Dense(512, activation='relu'),

    tf.keras.layers.Dense(number_of_classes, activation='softmax')
])

model.summary()

In [None]:
model.compile(loss = 'categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy'])

# history = model.fit(train_generator, epochs=25, steps_per_epoch=20, validation_data = validation_generator, verbose = 1, validation_steps=3)
steps_per_epoch = len(train)//train_size
validation_steps = len(val)//val_size
print(f"steps_per_epoch: {steps_per_epoch}")
print(f"validation_steps: {validation_steps}")

history = model.fit(train, epochs=25, validation_data = val, verbose = 1)


In [None]:

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()


plt.show()

In [None]:
from tensorflow.keras.metrics import Precision, Recall, CategoricalAccuracy
pre = Precision()
rec = Recall()
cat_acc = CategoricalAccuracy()

In [None]:
for batch in test.as_numpy_iterator():
    X, y = batch
    yhat = model.predict(X)
    pre.update_state(y, yhat)
    rec.update_state(y, yhat)
    cat_acc.update_state(y, yhat)

In [None]:
print(f"Precision: {pre.result().numpy()}")
print(f"Recall: {rec.result().numpy()}")
print(f"Categorical Accuracy: {cat_acc.result().numpy()}")

# 100 screenshots
# 010 keep
# 001 work

In [None]:
img_path = './tmp/both/ssh.jpg'
# img_path = './tmp/both/wrk.jpg'
# img_path = './tmp/both/kp.jpg'
img = mpimg.imread(img_path)
plt.imshow(img)
plt.show()
resize = tf.image.resize(img, [256, 256])
yhat = model.predict(np.expand_dims(resize/255.0, 0))
print(yhat)
# get index of max value in array
pred = np.argmax(yhat)
if pred == 0:
    print("screenshot")
elif pred == 1:
    print("keep")
else:
    print("work")
# 100 screenshots
# 010 keep
# 001 work

In [None]:
resize = tf.image.resize(img, [256, 256])
yhat = model.predict(np.expand_dims(resize/255.0, 0))
# get index of max value in array
pred = np.argmax(yhat)
if pred == 0:
    print("screenshot")
elif pred == 1:
    print("keep")
else:
    print("work")
# 100 screenshots
# 010 keep
# 001 work

In [None]:
# get index of max value in array
pred = np.argmax(yhat)
if pred == 0:
    print("screenshot")
elif pred == 1:
    print("keep")
else:
    print("work")