In [22]:
""" Reads dataframe and uses Resnet 50 to perform species classification on Dangermont data """

' Reads dataframe and uses Resnet 50 to perform species classification on Dangermont data '

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import os

In [2]:
df = pd.read_csv("df.csv", index_col=False)
df = df.drop(columns=["Unnamed: 0"])

In [3]:
def get_image(img_name):
    path = f"images-bboxes/jldp/{img_name}.jpg"
    return cv2.imread(path)

In [4]:
df["image"] = df["image_name"].apply(get_image)

In [5]:
df

Unnamed: 0,image_name,species,common_name,image
0,0005ef15-2aa9-4df3-b843-0329866e57c3,latrans,Coyote,"[[[1, 1, 1], [1, 1, 1], [1, 1, 1], [1, 1, 1], ..."
1,000a7ac3-decf-40ad-83c8-eff254efc678,latrans,Coyote,"[[[20, 20, 20], [18, 18, 18], [18, 18, 18], [1..."
2,00162be1-3ae1-4703-992e-c5865e6e870a,scrofa,Wild Boar,"[[[119, 129, 139], [120, 130, 140], [120, 130,..."
3,00168b48-41c5-4c66-bef6-908d9a50175e,scrofa,Wild Boar,"[[[152, 117, 67], [152, 117, 67], [149, 114, 6..."
4,0030e116-4944-4390-9d56-094779752388,latrans,Coyote,"[[[13, 13, 13], [15, 15, 15], [17, 17, 17], [1..."
...,...,...,...,...
9462,ffeea024-47ac-477f-b670-b9dc8029ff82,scrofa,Wild Boar,"[[[105, 105, 105], [110, 110, 110], [104, 104,..."
9463,ffeff04b-2970-4be5-a966-84a3f082bdf9,latrans,Coyote,"[[[37, 37, 37], [33, 33, 33], [37, 37, 37], [3..."
9464,fff2f4e7-429f-4571-a4e4-32642ac8776e,latrans,Coyote,"[[[62, 62, 62], [58, 58, 58], [58, 58, 58], [6..."
9465,fff38e44-fc40-4a7e-a9e8-64ee14108035,latrans,Coyote,"[[[6, 6, 6], [6, 6, 6], [6, 6, 6], [7, 7, 7], ..."


In [6]:
# 25 classes
# marked class imbalance!
df[["common_name", "image_name"]].groupby("common_name", as_index=False).count().\
    sort_values(by="image_name", ascending=False)

num_classes = len(df["common_name"].unique())

In [7]:
print(f"There are {num_classes} classes")

There are 25 classes


In [8]:
# organize data in necessary format
for img, img_name, cname in zip(df["image"].values, df["image_name"].values, df["common_name"].values):
    path = "tf-dataset/jldp"
    full_path = f"{path}/{cname}/"
    
    # one folder per label
    if not os.path.isdir(full_path):
        os.mkdir(full_path)
        
    full_name = f"{full_path}/{img_name}.jpg"
    
    # write file if it doesn't exist already
    if not os.path.isfile(full_name):
        cv2.imwrite(full_name, img)

In [9]:
# resize all images
new_shape = (224, 224)
df["image"] = df["image"].apply(lambda x: cv2.resize(x, new_shape))

### Resnet 50

In [10]:
from tensorflow.keras.applications import ResNet50
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [11]:
# Load the pre-trained model
base_model = ResNet50(weights="imagenet", include_top=True, input_shape=(224, 224, 3))
batch_size = 32

In [12]:
len(base_model.layers)

177

In [13]:
# freeze half the layers
for layer in base_model.layers[:-30]:
    layer.trainable = False

# all data
# :-70, 2 epochs, accuracy 0.65, val accuracy 0.41
# :-30, 3 epochs, accuracy 0.74, val accuracy 0.77
# :-20, 5 epochs, accuracy 0.60, val accuracy 0.60

# removing labels with <= 5 observations
# :-30, 3 epochs, accuracy 0.86, val accuracy 0.85

In [14]:
model = Sequential([
    base_model, 
    Flatten(),
    Dense(num_classes, activation="softmax")
])

In [15]:
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])

In [16]:
datagen = ImageDataGenerator(
    preprocessing_function = preprocess_input,
    validation_split = 0.2
)

path = "tf-dataset/jldp/"
train_generator = datagen.flow_from_directory(
    path,
    target_size = (224, 224),
    batch_size = batch_size,
    class_mode = "categorical",
    subset = "training"
)

validation_generator = datagen.flow_from_directory(
    path,
    target_size = (224, 224),
    batch_size = batch_size,
    class_mode = "categorical",
    subset = "validation"
)

Found 7586 images belonging to 25 classes.
Found 1881 images belonging to 25 classes.


In [17]:
model.fit(
    train_generator,
    validation_data = validation_generator,
    epochs = 3,
    verbose = True
)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<keras.callbacks.History at 0x1e3a14dad50>

In [21]:
# save model
structure_path = "tf-dataset/models/model01.json"
weights_path = "tf-dataset/models/model01.h5"

# save structure
with open(structure_path, "w") as json_file:
    json_file.write(model.to_json())
    
# save weights
model.save_weights(weights_path)