# Running CNN on some input
- Given the model and a 1 hour file, output the locations of the calls in the file and relevant metrics \
- Then generate selection table that can be opened up in raven to see where the detections are for further manual analysis 

## Imports

In [1]:
import tensorflow as tf
import numpy as np
import librosa
import pickle
import time
import pandas
import datetime
import os

## Parameters

In [162]:
ML_SR = 8000 # Target sampling rate
SPECD_FFT_LEN =  512 # Real FFT length (in the M4F - we use double of this on the PC as we don't do single-sided)
ML_BIN_AGG = 14 # Number of frequency bins (vertical dimension)
ML_FLO = 600 # Low freq
ML_FHI = 1400 # High freq
ML_FFT_STRIDE=1024 # Stride length in audio samples
ML_NUM_FFT_STRIDES = 12 # How many strides make up a sample
THRESHOLDED = False # Threshold the template or not

ONE_HOUR_FILE_PATH = "C:\\Users\\Amogh\\OneDrive - University of Cambridge\\Programming-New\\CaracalChitalDetector\\cnn\\data\\CAR204_20240325$164500_1711364400.wav"  # Which 1 hour file you want to look at
SELECTION_TABLE_FILE_PATH = ONE_HOUR_FILE_PATH[:-3] + "Table.1.selections.txt"
SELECTION_TABLE_FILE_PATH = SELECTION_TABLE_FILE_PATH.replace("data", "annotations")
#CNN_MODEL_PATH = "03MLouput20240811v001.keras"  # Path of where the pretrained model is stored
CNN_MODEL_PATH = "model_new.keras"
CNN_OUTPUT_FILE_PATH = "output.pkl"  # Where the output of this model is stored - list of (LB, UB) times where a call is detected 
SELECTION_TABLE_OUTPUT = 'output.txt'  # Data from output.pkl stored in selection table format for analysis in raven
ANNOTATION_FILE = 'AcousticAnnotations001.pb'  # CSV file where all true annotations are stored
C_CALLS_ALL_DIR = 'c_calls_all.pkl' # Stores all the detections that are used

## "Simple magic" functions
Takes a clip and returns an spectral image \
By Prof Andrew \
2 different implemetations depending on if we want to use random numbers or not (when same clip is inputed should we get the same output everytime the function is ran)

In [3]:
def chunkToBinsFixed(chunk,fLo,fHi,numbins,sr):
    """convert a chunk (window) to spectral image.
    Provide the low and high frequencies in Hz for a spectral windowing
    numbins are the number of output bins between flo and fhi
    Provide the sample rate in Hz"""
    CMPLX_FFT_LEN = len(chunk)*2
    fS = np.fft.fft(chunk,n=CMPLX_FFT_LEN) # fft - note we double it for the cmplx fft
    fRes = sr/(CMPLX_FFT_LEN)   # frequency per cmplx bin
    # Find the bin indices - map from frequency to FFT index
    binLo = int(fLo/sr*CMPLX_FFT_LEN)
    binHi = int(fHi/sr*CMPLX_FFT_LEN)
    specSize = int((binHi-binLo)/numbins)
    binTotals = np.zeros(numbins)
    for k in range(numbins):
        dbSum = 0
        for j in range(specSize):
            idx = binLo + (k * specSize) + j # NB not numbins!
            # Convert complex magnitude to absolute value
            absVal = np.abs(fS[idx])
            # We add an offset so we don't take log of tiny numbers. We can explore what a sensible offset is - 1.0 is probably too high.
            absVal += 1.0 
            # Convert to "power" by taking log
            dbVal = np.log(absVal) # NB natural (not log10) base!
            # Add up all the "powers" - again, this is probably not "correct", but we are just trying to work out some useful input features
            dbSum += dbVal
        binTotals[k] = dbSum
    return binTotals

In [4]:

# With random
def wavFileToSpecImg(aud,num_strides,target_sr=8000,random_offset=4000,THRESHOLDED=True,SCALED=True):
    tList = []
    startIdx = int(np.random.uniform(random_offset))
    for idx in range(0,num_strides*ML_FFT_STRIDE,int(ML_FFT_STRIDE)):
        clip = aud[idx+startIdx:idx+startIdx+SPECD_FFT_LEN]
        q = chunkToBinsFixed(clip,ML_FLO,ML_FHI,ML_BIN_AGG,ML_SR)
        tList.append(q)
    tList = np.array(tList)
    # Thresholding
    if THRESHOLDED:
        tList = (tList >0)*tList
    if SCALED:
        # Scale the dB mag spec to +1/-1
        maxVal = np.max(tList)
        minVal = np.min(tList)
        tList = (tList-minVal)/(maxVal-minVal)
    return np.array(tList)


# Without random
'''
def wavFileToSpecImg(aud,num_strides,target_sr=8000,THRESHOLDED=True,SCALED=True):
    tList = []
    for idx in range(0,num_strides*ML_FFT_STRIDE,int(ML_FFT_STRIDE)):
        clip = aud[idx:idx+SPECD_FFT_LEN]
        q = chunkToBinsFixed(clip,ML_FLO,ML_FHI,ML_BIN_AGG,ML_SR)
        tList.append(q)
    tList = np.array(tList)
    # Thresholding
    if THRESHOLDED:
        tList = (tList >0)*tList
    if SCALED:
        # Scale the dB mag spec to +1/-1
        maxVal = np.max(tList)
        minVal = np.min(tList)
        tList = (tList-minVal)/(maxVal-minVal)
    return np.array(tList)
'''

'\ndef wavFileToSpecImg(aud,num_strides,target_sr=8000,THRESHOLDED=True,SCALED=True):\n    tList = []\n    for idx in range(0,num_strides*ML_FFT_STRIDE,int(ML_FFT_STRIDE)):\n        clip = aud[idx:idx+SPECD_FFT_LEN]\n        q = chunkToBinsFixed(clip,ML_FLO,ML_FHI,ML_BIN_AGG,ML_SR)\n        tList.append(q)\n    tList = np.array(tList)\n    # Thresholding\n    if THRESHOLDED:\n        tList = (tList >0)*tList\n    if SCALED:\n        # Scale the dB mag spec to +1/-1\n        maxVal = np.max(tList)\n        minVal = np.min(tList)\n        tList = (tList-minVal)/(maxVal-minVal)\n    return np.array(tList)\n'

## Analysis singular
### Load CNN and random 1 hour file

In [163]:
# Load CNN
model = tf.keras.models.load_model(CNN_MODEL_PATH)

# 1 hour file - load anything you like 
data, sr = librosa.load(ONE_HOUR_FILE_PATH, sr=8000)

### Main loop
Iterate through each 3 second window in 1 hour file \
Run special function to generate image from audio clip \
Run the pretrained CNN on this clip \
Iterating each 1.5 second which is half a window \
Results in each window having half an overlap with the previous and next iteration

In [164]:
C_calls = []

print(f'Total iterations required: {(len(data)//int(1.5*sr))}')

start = time.time()

# Iterate through each 1.5 second (half a window)
for i, LB in enumerate(range(0, len(data)-int(1.5*sr), int(1.5*sr))):

    # Get lB
    UB = LB + (3*sr)
    
    # Extract clip
    clip = data[LB:UB]
    
    # Get spectral image using special function
    clipImg = wavFileToSpecImg(clip,num_strides=ML_NUM_FFT_STRIDES)
    
    # Reshape to (num_samples, height, width, channels) for use of CNN
    clipImg = clipImg.reshape(1, clipImg.shape[0], clipImg.shape[1], 1)
    
    # Run through CNN and get prediction(
    result = model.predict(clipImg, verbose=0)
    
    # If positive prediction and passes threshold, add to list
    if np.argmax(result[0]) == 1 and result[0][1] > 0.5:
        C_calls.append((LB/sr, UB/sr))
    
    print(i+1)
    
end = time.time()

print(C_calls)
print(f'total time taken: {end-start}')

Total iterations required: 2400
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269


### Save results

In [60]:
with open(CNN_OUTPUT_FILE_PATH, 'wb') as f:
    pickle.dump(C_calls, f)

In [165]:
print(len(C_calls))
print(C_calls)
print(ONE_HOUR_FILE_PATH)
print(SELECTION_TABLE_FILE_PATH)

108
[(67.5, 70.5), (180.0, 183.0), (357.0, 360.0), (489.0, 492.0), (493.5, 496.5), (499.5, 502.5), (529.5, 532.5), (535.5, 538.5), (595.5, 598.5), (600.0, 603.0), (621.0, 624.0), (627.0, 630.0), (631.5, 634.5), (696.0, 699.0), (705.0, 708.0), (739.5, 742.5), (765.0, 768.0), (766.5, 769.5), (805.5, 808.5), (807.0, 810.0), (843.0, 846.0), (1159.5, 1162.5), (1260.0, 1263.0), (1485.0, 1488.0), (1519.5, 1522.5), (1524.0, 1527.0), (1533.0, 1536.0), (1540.5, 1543.5), (1555.5, 1558.5), (1566.0, 1569.0), (1579.5, 1582.5), (1594.5, 1597.5), (1599.0, 1602.0), (1612.5, 1615.5), (1615.5, 1618.5), (1617.0, 1620.0), (1627.5, 1630.5), (1630.5, 1633.5), (1644.0, 1647.0), (1651.5, 1654.5), (1654.5, 1657.5), (1656.0, 1659.0), (1668.0, 1671.0), (1677.0, 1680.0), (1689.0, 1692.0), (1699.5, 1702.5), (1701.0, 1704.0), (1702.5, 1705.5), (1720.5, 1723.5), (1722.0, 1725.0), (1744.5, 1747.5), (1746.0, 1749.0), (1749.0, 1752.0), (1761.0, 1764.0), (1764.0, 1767.0), (1806.0, 1809.0), (1809.0, 1812.0), (1821.0, 1824

### Sensitivity and FAR (False acceptance rate)
- Sensitivity = FN/(TP+FN)
- FAR = FP/hour 
- Go into the selection table for the 1 hour file you are looking at and count the no of TP, FP and FN

In [166]:
with open(ANNOTATION_FILE,'rb') as f:
    df = pickle.load(f)

# Filter by positive (C) calls
df = df[df['Annotation'] == 'C']

# Filter by the selection table of the 1 hour file you are looking at
df = df[df['SourceFile'] == SELECTION_TABLE_FILE_PATH]

# Filter unused columns
df = df.drop(columns=['SourceFile', 'LocationName', 'AnnotationType', 'StartTime', 'EndTime', 'FileStartTime', 'LowFreq', 'HighFreq', 'Annotation'])

TP = 0
FN = 0
FP = 0
totaltime = len(data) / sr / 3600

# Go through each actual annotation and try and find a matching detection by the CNN
# If found, incriment TP 
# If went through all annotations and didn't find, increment FN
# <start/end> means true call
# <start/end>1 means detected call
for start, end in zip(df['RelativeStartTime'], df['RelativeEndTime']):
    start = start.to_pytimedelta().total_seconds()
    end = end.to_pytimedelta().total_seconds()
    found = False
    for start1, end1 in C_calls:
        if (start1 >= start and end1 <= end) or (start >= start1 and end <= end1) or (start1 >= start and end1 >= end and start1 <= end) or (start >= start1 and end >= end1 and start <= end1):
            TP += 1
            found = True
            break
    if not found:
        FN += 1

# Go through each detection and see if an annotation exists, if not, then it is a FP
for start, end in C_calls:
    found = False
    for start1, end1 in zip(df['RelativeStartTime'], df['RelativeEndTime']):
        start1 = start1.to_pytimedelta().total_seconds()
        end1 = end1.to_pytimedelta().total_seconds()
        if (start1 >= start and end1 <= end) or (start >= start1 and end <= end1) or (start1 >= start and end1 >= end and start1 <= end) or (start >= start1 and end >= end1 and start <= end1):
            found = True
            break 
    if not found:
        FP += 1

print(f'TP: {TP}')
print(f'FP: {FP}')
print(f'FN: {FN}')

#print(list(zip(df['RelativeStartTime'], df['RelativeEndTime'])))

print(f'Sensitivity (FN/(TP+FN)): {FN/(TP+FN)}')
print(f'FAR (FP/hour): {FP/totaltime}')

TP: 0
FP: 108
FN: 0


ZeroDivisionError: division by zero

## Iteration over all 1 hour files for mean stats
- Same code as before but iterates over all 1 hour files
- Prints all the stats for all of the files - To find outliers of badly performed files
- And prints mean stats

In [6]:
# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
        printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    # Print New Line on Complete
    if iteration == total: 
        print()

In [None]:
sensitivities = []
FARs = []
filenames = os.listdir("C:\\Users\\Amogh\\OneDrive - University of Cambridge\\Programming-New\\CaracalChitalDetector\\cnn\\data")
C_calls_all = []
model = tf.keras.models.load_model(CNN_MODEL_PATH)

for filename_count, filename in enumerate(filenames):
    
    ONE_HOUR_FILE_PATH = "C:\\Users\\Amogh\\OneDrive - University of Cambridge\\Programming-New\\CaracalChitalDetector\\cnn\\data\\" + filename
    SELECTION_TABLE_FILE_PATH = ONE_HOUR_FILE_PATH[:-3] + "Table.1.selections.txt"
    SELECTION_TABLE_FILE_PATH = SELECTION_TABLE_FILE_PATH.replace("data", "annotations")
    
    # 1 hour file - load anything you like 
    data, sr = librosa.load(ONE_HOUR_FILE_PATH, sr=8000)

    total_iterations = len(data)//int(1.5*sr)
    printProgressBar(0, total_iterations, prefix = f'Progress {filename_count+1}/{len(filenames)} {i+1}/{total_iterations} {filename}:', suffix = 'Complete', length = 50)

    C_calls = []
    
    # Iterate through each 1.5 second (half a window)
    for i, LB in enumerate(range(0, len(data)-int(1.5*sr), int(1.5*sr))):
    
        # Get lB
        UB = LB + (3*sr)
        
        # Extract clip
        clip = data[LB:UB]

        # If clip is empty, skip
        if len(clip) < sr*3.0:
            continue
        
        # Get spectral image using special function
        clipImg = wavFileToSpecImg(clip,num_strides=ML_NUM_FFT_STRIDES)
        
        # Reshape to (num_samples, height, width, channels) for use of CNN
        clipImg = clipImg.reshape(1, clipImg.shape[0], clipImg.shape[1], 1)
        
        # Run through CNN and get prediction(
        result = model.predict(clipImg, verbose=0)
        
        # If positive prediction and passes threshold, add to list
        if np.argmax(result[0]) == 1 and result[0][1] > 0.5:
            C_calls.append((LB/sr, UB/sr))
            #C_calls_all.append((filename, LB/sr, UB/sr))

        printProgressBar(i + 1, total_iterations, prefix = f'Progress {filename_count+1}/{len(filenames)} {i+1}/{total_iterations} {filename}:', suffix='Complete', length=50)

    with open(ANNOTATION_FILE,'rb') as f:
        df = pickle.load(f)

    # Filter by positive (C) calls
    df = df[df['Annotation'] == 'C']
    
    # Filter by the selection table of the 1 hour file you are looking at
    df = df[df['SourceFile'] == SELECTION_TABLE_FILE_PATH]
    
    # Filter unused columns
    df = df.drop(columns=['SourceFile', 'LocationName', 'AnnotationType', 'StartTime', 'EndTime', 'FileStartTime', 'LowFreq', 'HighFreq', 'Annotation'])
    
    TP = 0
    FN = 0
    FP = 0
    totaltime = len(data) / sr / 3600
    
    # Go through each actual annotation and try and find a matching detection by the CNN
    # If found, incriment TP 
    # If went through all annotations and didn't find, increment FN
    # <start/end> means true call
    # <start/end>1 means detected call
    for start, end in zip(df['RelativeStartTime'], df['RelativeEndTime']):
        start = start.to_pytimedelta().total_seconds()
        end = end.to_pytimedelta().total_seconds()
        found = False
        for start1, end1 in C_calls:
            if (start1 >= start and end1 <= end) or (start >= start1 and end <= end1) or (start1 >= start and end1 >= end and start1 <= end) or (start >= start1 and end >= end1 and start <= end1):
                TP += 1
                found = True
                break
        if not found:
            FN += 1
    
    # Go through each detection and see if an annotation exists, if not, then it is a FP
    for start, end in C_calls:
        found = False
        for start1, end1 in zip(df['RelativeStartTime'], df['RelativeEndTime']):
            start1 = start1.to_pytimedelta().total_seconds()
            end1 = end1.to_pytimedelta().total_seconds()
            if (start1 >= start and end1 <= end) or (start >= start1 and end <= end1) or (start1 >= start and end1 >= end and start1 <= end) or (start >= start1 and end >= end1 and start <= end1):
                found = True
                break 
        if not found:
            FP += 1
            C_calls_all.append((filename, start, end))

    print()
    
    if TP+FN == 0:
        sensitivities.append(None)
        print(None)
    else:
        sensitivities.append(FN/(TP+FN))
        print(FN/(TP+FN))
    FARs.append(FP/totaltime)
    print(FP/totaltime)
    print()

print(sensitivities)
print(FARs)

Progress 1/26 1839/1840 CAR204_20240323$135900_1711181640.wav: |█████████████████████████████████████████████████-| 99.9% Complete
0.625
1.303894297635605

Progress 2/26 1639/1640 CAR204_20240323$160400_1711189140.wav: |█████████████████████████████████████████████████-| 99.9% Complete
0.13333333333333333
17.550702028081123

Progress 3/26 56/1880 CAR204_20240325$075800_1711332780.wav: |█-------------------------------------------------| 3.0% Completete

## Save the C_calls_all
Go through all C_calls and remove the true positive ones

In [149]:
with open(C_CALLS_ALL_DIR, 'wb') as f:
    pickle.dump(C_calls_all, f)

## Get means
- Remove None values (1 hour files without any C call labels in their selection table at all) and return mean
- Store of all previous results of models for easy comparison

In [155]:
# Just manually removing the None values
# OLD MODEL:
x_old = [0.75, 0.0, None, None, None, None, None, None, 0.006211180124223602, None, None, None, 0.05405405405405406, None, None, 0.09375, 0.15428571428571428, None, None, None, 0.5, None, 0.2916666666666667, 0.16666666666666666, 0.021739130434782608, 0.03260869565217391]
y_old = [5.21557719054242, 65.81513260530421, 17.866866321807787, 265.9716296928328, 43.00458715596331, 18.001920204821847, 18.997973549488055, 38.995840443686006, 146.0155749946661, 0.9998933447098975, 29.00309366332409, 10.998826791808874, 32.99648037542662, 21.997653583617748, 85.00906763388095, 41.004373799871985, 30.996693686006825, 9.456960322797578, 0.0, 10.001066780456583, 25.997226962457336, 27.002880307232772, 42.004480477917646, 12.525979809976247, 125.9865614334471, 87.00928098997227]

# NEW MODEL: 50/50 old
x_new = [0.375, 0.13333333333333333, None, None, None, None, None, None, 0.049689440993788817, None, None, None, 0.02702702702702703, None, None, 0.17708333333333334, 0.21714285714285714, None, None, None, 1.0, None, 0.375, 0.0, 0.021739130434782608, 0.07065217391304347]
y_new = [79.5375521557719, 36.56396255850234, 65.08644160087123, 56.99392064846416, 66.00704075101345, 88.00938766801792, 163.9825085324232, 141.98485494880546, 76.00810753147003, 146.98432167235495, 38.004053765735016, 44.99520051194539, 58.993707337883954, 143.98464163822524, 66.00704075101345, 26.002773629187114, 28.99690699658703, 25.21856086079354, 58.00618732664818, 110.01173458502241, 37.99594709897611, 39.00416044378067, 107.01141455088543, 36.186163895486935, 50.99456058020478, 61.00650736078515]


# NEW MODEL 2: weights old
x_new_2 = [0.25, 0.13333333333333333, None, None, None, None, None, None, 0.043478260869565216, None, None, None, 0.0, None, None, 0.13541666666666666, 0.2057142857142857, None, None, None, 1.0, None, 0.375, 0.25, 0.09420289855072464, 0.08152173913043478]
y_new_2 = [100.39986091794158, 30.713728549141965, 61.257827389055265, 43.995307167235495, 50.00533390228291, 78.00832088756134, 222.97621587030716, 171.9816552901024, 68.00725410710476, 150.98389505119454, 30.003200341369748, 39.9957337883959, 54.99413395904437, 114.98773464163823, 53.00565393641989, 17.00181352677619, 25.997226962457336, 28.370880968392733, 60.006400682739496, 96.0102410923832, 33.99637372013652, 30.003200341369748, 95.01013441433753, 33.402612826603324, 34.996267064846414, 51.00544058032857]

# NEW MODEL 3: weights new
x_new_3 = [0.625, 0.0, None, None, None, None, None, None, 0.018633540372670808, None, None, None, 0.10810810810810811, None, None, 0.1875, 0.14857142857142858, None, None, None, 0.5, None, 0.375, 0.25, 0.021739130434782608, 0.07065217391304347]
y_new_3 = [3.911682892906815, 58.50234009360375, 22.971685270895726, 248.9734428327645, 40.00426712182633, 24.0025602730958, 15.99829351535836, 48.994773890784984, 138.01472157030085, 0.9998933447098975, 27.002880307232772, 7.99914675767918, 31.99658703071672, 25.997226962457336, 90.00960102410924, 41.004373799871985, 33.99637372013652, 3.1523201075991927, 1.0001066780456582, 6.00064006827395, 28.99690699658703, 27.002880307232772, 37.003947087689355, 15.309530878859858, 122.9868813993174, 85.00906763388095]

# NEW MODEL 3: 50/50 new
bad

print(np.mean([x for x in x_old if x != None]))
print(np.mean([y for y in y_old if y != None]))
#print()
#print(np.mean([x for x in x_new if x != None]))
#print(np.mean([y for y in y_new if y != None]))
#print()
#print(np.mean([x for x in x_new_2 if x != None]))
#print(np.mean([y for y in y_new_2 if y != None]))
print()
print(np.mean([x for x in x_new_3 if x != None]))
print(np.mean([y for y in y_new_3 if y != None]))

0.1882711007167529
46.649062389308355

0.20956403467273033
45.64777429099757


## Generate Selection Table of Detected C_calls for inspection

In [68]:
file = ['Selection\tView\tChannel\tBegin Time (s)\tEnd Time (s)\tLow Freq (Hz)\tHigh Freq (Hz)\tSpecies\n']
for i, (LB, UB) in enumerate(C_calls):
    file.append(f'{i+1}\tSpectrogram 1\t1\t{LB}\t{UB}\t{ML_FLO}\t{ML_FHI}\tC\n')

with open('SELECTION_TABLE_OUTPUT.txt', 'w') as f:
    for line in file:
        f.write(line)

## Testing model with POS, NEG, CON dataset

In [103]:
from glob import glob
DIR = "C:\\Users\\Amogh\\OneDrive - University of Cambridge\\Programming-New\\CaracalChitalDetector\\cnn\\output"
model = tf.keras.models.load_model('model.keras')
filenames = glob("C:\\Users\\Amogh\\OneDrive - University of Cambridge\\Programming-New\\CaracalChitalDetector\\cnn\\output\\POS_*.wav")

for filename in filenames:
    full_path = os.path.join(DIR, filename)
    data, sr = librosa.load(full_path, sr=8000)
    
    # Get spectral image using special function
    clipImg = wavFileToSpecImg(data,num_strides=ML_NUM_FFT_STRIDES)
    
    # Reshape to (num_samples, height, width, channels) for use of CNN
    clipImg = clipImg.reshape(1, clipImg.shape[0], clipImg.shape[1], 1)
    
    # Run through CNN and get prediction(
    result = model.predict(clipImg, verbose=0)
    
    # If positive prediction and passes threshold, add to list
    if np.argmax(result[0]) == 1 and result[0][1] > 0.5:
        #C_calls.append((LB/sr, UB/sr))
        pass
    print(np.argmax(result[0]))

0
0
0
1
1
1
0
0
0
1
1
1
1
1
1
1
1
1
1
1
0
0
0
1
1
1
1
1
1
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1


KeyboardInterrupt: 