# Refeeding data into CNN
- Going through all the FP's from the previous cnn model and adding them as NEG clips
- Retraining the model on this new data
- Testing model on whole dataset can be done with 06 by loading this new model and running the file (so new model not tested here and just saved instead)


## Imports

In [7]:
import tensorflow as tf
import numpy as np
import librosa
import pickle
import time
import pandas
import datetime
import os
from tensorflow.keras.utils import to_categorical

## Parameters

In [8]:
ML_SR = 8000 # Target sampling rate
SPECD_FFT_LEN =  512 # Real FFT length (in the M4F - we use double of this on the PC as we don't do single-sided)
ML_BIN_AGG = 14 # Number of frequency bins (vertical dimension)
ML_FLO = 600 # Low freq
ML_FHI = 1400 # High freq
ML_FFT_STRIDE=1024 # Stride length in audio samples
ML_NUM_FFT_STRIDES = 12 # How many strides make up a sample
THRESHOLDED = False # Threshold the template or not

#CNN_MODEL_PATH = "03MLouput20240811v001.keras"  
CNN_MODEL_PATH_OLD = "model.keras"  # Path of where the pretrained model is stored
CNN_MODEL_PATH_NEW = "model_new.keras" # Path of where the new model is stored
#ANNOTATION_FILE = 'AcousticAnnotations001.pb'  # CSV file where all true annotations are stored
C_CALLS_ALL_DIR = 'c_calls_all.pkl' # Where filename, LB, UB of all C_calls are stored
AUDIO_FILES_DIR = "C:\\Users\\Amogh\\OneDrive - University of Cambridge\\Programming-New\\CaracalChitalDetector\\cnn\\data"

## Magic functions
Takes a clip and returns an spectral image \
By Prof Andrew \
2 different implemetations depending on if we want to use random numbers or not (when same clip is inputed should we get the same output everytime the function is ran)

In [9]:
def chunkToBinsFixed(chunk,fLo,fHi,numbins,sr):
    """convert a chunk (window) to spectral image.
    Provide the low and high frequencies in Hz for a spectral windowing
    numbins are the number of output bins between flo and fhi
    Provide the sample rate in Hz"""
    CMPLX_FFT_LEN = len(chunk)*2
    fS = np.fft.fft(chunk,n=CMPLX_FFT_LEN) # fft - note we double it for the cmplx fft
    fRes = sr/(CMPLX_FFT_LEN)   # frequency per cmplx bin
    # Find the bin indices - map from frequency to FFT index
    binLo = int(fLo/sr*CMPLX_FFT_LEN)
    binHi = int(fHi/sr*CMPLX_FFT_LEN)
    specSize = int((binHi-binLo)/numbins)
    binTotals = np.zeros(numbins)
    for k in range(numbins):
        dbSum = 0
        for j in range(specSize):
            idx = binLo + (k * specSize) + j # NB not numbins!
            # Convert complex magnitude to absolute value
            absVal = np.abs(fS[idx])
            # We add an offset so we don't take log of tiny numbers. We can explore what a sensible offset is - 1.0 is probably too high.
            absVal += 1.0 
            # Convert to "power" by taking log
            dbVal = np.log(absVal) # NB natural (not log10) base!
            # Add up all the "powers" - again, this is probably not "correct", but we are just trying to work out some useful input features
            dbSum += dbVal
        binTotals[k] = dbSum
    return binTotals

In [4]:

# With random
def wavFileToSpecImg(aud,num_strides,target_sr=8000,random_offset=4000,THRESHOLDED=True,SCALED=True):
    tList = []
    startIdx = int(np.random.uniform(random_offset))
    for idx in range(0,num_strides*ML_FFT_STRIDE,int(ML_FFT_STRIDE)):
        clip = aud[idx+startIdx:idx+startIdx+SPECD_FFT_LEN]
        q = chunkToBinsFixed(clip,ML_FLO,ML_FHI,ML_BIN_AGG,ML_SR)
        tList.append(q)
    tList = np.array(tList)
    # Thresholding
    if THRESHOLDED:
        tList = (tList >0)*tList
    if SCALED:
        # Scale the dB mag spec to +1/-1
        maxVal = np.max(tList)
        minVal = np.min(tList)
        tList = (tList-minVal)/(maxVal-minVal)
    return np.array(tList)


# Without random
'''
def wavFileToSpecImg(aud,num_strides,target_sr=8000,THRESHOLDED=True,SCALED=True):
    tList = []
    for idx in range(0,num_strides*ML_FFT_STRIDE,int(ML_FFT_STRIDE)):
        clip = aud[idx:idx+SPECD_FFT_LEN]
        q = chunkToBinsFixed(clip,ML_FLO,ML_FHI,ML_BIN_AGG,ML_SR)
        tList.append(q)
    tList = np.array(tList)
    # Thresholding
    if THRESHOLDED:
        tList = (tList >0)*tList
    if SCALED:
        # Scale the dB mag spec to +1/-1
        maxVal = np.max(tList)
        minVal = np.min(tList)
        tList = (tList-minVal)/(maxVal-minVal)
    return np.array(tList)
'''

'\ndef wavFileToSpecImg(aud,num_strides,target_sr=8000,THRESHOLDED=True,SCALED=True):\n    tList = []\n    for idx in range(0,num_strides*ML_FFT_STRIDE,int(ML_FFT_STRIDE)):\n        clip = aud[idx:idx+SPECD_FFT_LEN]\n        q = chunkToBinsFixed(clip,ML_FLO,ML_FHI,ML_BIN_AGG,ML_SR)\n        tList.append(q)\n    tList = np.array(tList)\n    # Thresholding\n    if THRESHOLDED:\n        tList = (tList >0)*tList\n    if SCALED:\n        # Scale the dB mag spec to +1/-1\n        maxVal = np.max(tList)\n        minVal = np.min(tList)\n        tList = (tList-minVal)/(maxVal-minVal)\n    return np.array(tList)\n'

## Custom F1 Metric
- By Prof Andrew

In [5]:
#@keras.saving.register_keras_serializable()
def f1_metric(y_true, y_pred):
    y_true = K.cast(y_true, 'int32')
    y_pred = K.cast(K.round(y_pred), 'int32')
    TP = K.sum(K.cast(y_true * y_pred, 'float'), axis=0)
    TN = K.sum(K.cast((1 - y_true) * (1 - y_pred), 'float'), axis=0)
    FP = K.sum(K.cast((1 - y_true) * y_pred, 'float'), axis=0)
    FN = K.sum(K.cast(y_true * (1 - y_pred), 'float'), axis=0)

    precision = TP / (TP + FP + K.epsilon())
    recall = TP / (TP + FN + K.epsilon())

    f1 = 2 * (precision * recall) / (precision + recall + K.epsilon())
    f1 = tf.where(tf.math.is_nan(f1), tf.zeros_like(f1), f1)
    return K.mean(f1)

## Getting all FP data spectograms as new NEG data
- Go through all saved FPs
- Get aud clip
- Convert into sepctral image
- Add to a new dataset ready for training

In [10]:
with open(C_CALLS_ALL_DIR, 'rb') as f:
    C_calls_all = pickle.load(f)

print(len(C_calls_all))
all_spectograms = []
for i, (filename, LB, UB) in enumerate(C_calls_all):
    # Get path
    path = os.path.join(AUDIO_FILES_DIR, filename)
    
    # Load files
    data, sr = librosa.load(path, sr=8000)

    # Get sample
    LB_samples = int(LB * sr)
    UB_samples = int(UB * sr)

    clip = data[LB_samples:UB_samples]

    # Get spectral image using special function
    clipImg = wavFileToSpecImg(clip,num_strides=ML_NUM_FFT_STRIDES)

    all_spectograms.append(clipImg)

    print(i+1)

# Reshape to (num_samples, height, width, channels)
all_spectograms = np.array(all_spectograms)
all_spectograms_neg = all_spectograms.reshape(all_spectograms.shape[0], all_spectograms.shape[1], all_spectograms.shape[2], 1)

labels_neg = np.array([[1, 0] for _ in range(len(all_spectograms))])

1160
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276

## Get POS examples
- From the existing dataset
- To make sure balanced with some POS examples

In [11]:
with open('all_data.pkl', 'rb') as f:
    images, labels = pickle.load(f)

num = len(all_spectograms)
pos_examples = []

for img, label in zip(images, labels):
    if num > 0 and (label == [0, 1]).all():
        pos_examples.append(img)
        num -= 1
    if num == 0:
        break

pos_examples = np.array(pos_examples)
labels_pos = np.array([[0, 1] for _ in range(len(pos_examples))])

## Loading and retraining model

In [14]:
from tensorflow.keras.optimizers import Adam

model = tf.keras.models.load_model(CNN_MODEL_PATH_OLD)

optimizer = Adam(learning_rate=0.0001)  # Use a smaller learning rate

model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy', tf.keras.metrics.Precision()]) # use this for accuracy as loss

# Trim the NEG dataset to match the size of the POS dataset
#all_spectograms_neg = all_spectograms_neg[:int(len(pos_examples))]
#labels_neg = labels_neg[:int(len(labels_pos))]

weight_for_0 = (1 / len(all_spectograms_neg)) * ((len(all_spectograms_neg)+len(pos_examples)) / 2.0)
weight_for_1 = (1 / len(pos_examples)) * ((len(all_spectograms_neg)+len(pos_examples))/ 2.0)

data = np.concatenate((pos_examples, all_spectograms_neg))
labels = np.concatenate((labels_pos, labels_neg))

#model.fit(data, labels, epochs=200, batch_size=32, validation_split=0.2, verbose=1)
model.fit(data, labels, epochs=100, batch_size=32, validation_split=0.2, verbose=1, class_weight={0: weight_for_0, 1: weight_for_1})

Epoch 1/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - accuracy: 0.6008 - loss: nan - precision_1: 0.6388 - val_accuracy: 0.5136 - val_loss: 1.0350 - val_precision_1: 0.5136
Epoch 2/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5943 - loss: nan - precision_1: 0.6496 - val_accuracy: 0.5931 - val_loss: 0.7859 - val_precision_1: 0.5931
Epoch 3/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5479 - loss: nan - precision_1: 0.6051 - val_accuracy: 0.5906 - val_loss: 0.7587 - val_precision_1: 0.5906
Epoch 4/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5990 - loss: nan - precision_1: 0.6401 - val_accuracy: 0.5906 - val_loss: 0.7344 - val_precision_1: 0.5906
Epoch 5/100
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5905 - loss: nan - precision_1: 0.6499 - val_accuracy: 0.5881 - val_loss: 0.7

<keras.src.callbacks.history.History at 0x194b2b84fb0>

## Print some stats

In [13]:
# Evaluate and plot the confusion matrix
y_pred_prob = model.predict(X_test)
y_pred = np.argmax(y_pred_prob, axis=1)
y_test_labels = np.argmax(y_test, axis=1)

# Calculate and print the F1 score
f1 = f1_score(y_test_labels, y_pred)
print(f"F1 Score: {f1:.2f}")

NameError: name 'X_test' is not defined

## Save model 

In [93]:
model.save(CNN_MODEL_PATH_NEW)