In [1]:
import os
from PIL import Image
import json
from datetime import datetime
import matplotlib.pyplot as plt

In [2]:
with open("/media/user-1/CameraTraps/NINA/combined.json", encoding='utf-8') as fh:
    data = json.load(fh)

In [3]:
def is_valid_date(date_string): # used to check if string contains a date
    try:
        datetime.strptime(date_string, '%d.%m.%Y')
        return True
    except ValueError:
        return False


In [4]:
classes = {}
metadata = []
for d in data:

    valid = True
    c = ""
    if(d['Filnavn'].split(".")[-1].lower() == "mp4"):
        continue
    if(d['Tekst'] != None and len(d['Tekst']) > 0): # remove entries where class label is missing
        t = d['Tekst']
        t = t.split(" ")
        date = t[-1]
        if(is_valid_date(date)):
            t = d['Tekst']
            t = t.split(" ")
            c = (" ".join(t[:-1])).lower()

        else:
            if("Rev" in t):
                c = 'rev'
            elif('gaupe' in t or "hanngaupe" in t or "Gaupe" in t):
                c = 'gaupe'
            elif("Ekorn" in t):
                c = "ekorn"
            elif("Hare" in t):
                c = "hare"
            elif("Mår" in t):
                c = "mår"
            elif("Grevling" in t):
                c = "grevling"
            elif("ulv" in t):
                c = "ulv"
            elif("Rådyr" in t):
                c = "rådyr"
            elif("Elg" in t):
                c = "elg"
            elif("Katt" in t):
                c = "katt"
            else: # all images that don't have clean labels
                valid=False # most likely invalid format or unknown species
                for s in t: # check if it's a lynx
                    if len(s) == 4 and s[0].isalpha() and s[1:].isdigit():
                        c = 'gaupe'
                        valid=True # since it is a lynx the datapoint is again valid
                        break
                if(not valid):
                    pass


    if(valid):
        if(len(c) > 1 and c != "ukjent"):
            # create new metadata structure
            datapoint = {}
            datapoint["Dato"] = d['Dato']
            datapoint['Temperatur'] = d['Temperatur']
            datapoint['CameraType'] = d['CameraType']
            datapoint['CameraModel'] = d['CameraModel']
            datapoint['Filnavn'] = d['Filnavn']
            datapoint['ExposureTime'] = d['ExposureTime']
            datapoint['ISO'] = d['ISO']
            datapoint['Brightness'] = d['Brightness']
            datapoint['Contrast'] = d['Contrast']
            datapoint['Sharpness'] = d['Sharpness']
            datapoint['Saturation'] = d['Saturation']
            datapoint['species'] = c

            # append to metadata list
            metadata.append(datapoint)

            if(c not in classes.keys()):
                classes[c] = 1
            else:
                classes[c] += 1
 
maxNum = 0
for key in classes.keys():
    maxNum = max(maxNum, classes[key])

newClasses = {}
for key in classes.keys():
    if(classes[key] > maxNum//1000): # remove all class entries which does not have at least .1% samples compared to majority class
        newClasses[key] = classes[key]
classes = newClasses
print(classes)

{'ekorn': 5564, 'rådyr': 54181, 'elg': 13969, 'rev': 20859, 'gaupe': 3703, 'hare': 11915, 'grevling': 18880, 'fugl': 6443, 'mår': 2199, 'rugde': 284, 'svarttrost': 512, 'ulv': 1208, 'annet pattedyr': 140, 'kjøttmeis': 263, 'katt': 1623, 'hjort': 10227, 'sau': 4836, 'skogshøns': 381, 'nøtteskrike': 59, 'ringdue': 284, 'bjørn': 115, 'jerpe': 59, 'meis': 61, 'storfugl': 446, 'villsvin': 374, 'jerv': 261, 'trost': 124, 'måltrost': 124, 'smågnager': 699, 'storfe': 456, 'skjære': 168, 'sørhare': 61}


In [5]:
new_metadata = []
for datapoint in metadata: # remove classes from metadata object with few species
    if(datapoint['species'] in classes.keys()):
        new_metadata.append(datapoint)

print(len(metadata))
metadata=new_metadata
print(len(metadata))

161021
160478


In [6]:
def find_image_path(path, filename):
    for root, dirs, files in os.walk(path):
        if filename in files:
            return os.path.join(root, filename)
    return None  # File not found

In [7]:
input_folder = "/home/user-1/prog/CameraTraps/NINA/images/"
output_folder = "/media/user-1/CameraTraps/NINA"
final_metadata = [] # used to track which images are copied over 
for datapoint in metadata: # copy images over to SSD
    filename = datapoint['Filnavn']
    full_path = find_image_path(input_folder, filename)
    if(full_path is not None):
        try:
            im = Image.open(full_path)
            w, h = im.size
            im = im.crop((50, 50, w - 50, h - 50)) # crop out metadata bands
            im = im.resize((512, 512)) # reshape to smaller square image
            im.save(output_folder + "/Images/" + filename)
            final_metadata.append(datapoint)
        except:
            print("Could not Read image:", full_path)

metadata = final_metadata
print(len(metadata))

Could not Read image: /home/user-1/prog/CameraTraps/NINA/images/Rådyr/7e1f0854-3c78-401a-b55c-e9f069307bb1.jpg
Could not Read image: /home/user-1/prog/CameraTraps/NINA/images/Rev/b121d084-cdcc-4548-ba39-39dc12a2b926.jpg
Could not Read image: /home/user-1/prog/CameraTraps/NINA/images/117_20121226_0642_M314_L (2)_20150504134314.jpg
Could not Read image: /home/user-1/prog/CameraTraps/NINA/images/Sanns./361_20160601_0101_ULV (2)_20160728192414.jpg
Could not Read image: /home/user-1/prog/CameraTraps/NINA/images/Sanns./361_20160502_2140_ULV (1)_20160728192225.jpg
160473


In [17]:
classNames = list(classes.keys())
for i in range(len(metadata)):
    metadata[i]['speciesID'] = classNames.index(metadata[i]['species'])

In [22]:
d = {'categories': classNames, 'annotations': metadata}

In [23]:
with open(output_folder + "/metadata.json", "w") as f: # save metadata file
    json.dump(d, f)