### This nodebook creates a report about the breeding terns in colony. It takes a report of the tracked breeding Terns info from a day and uses a pre-trained classifier to determine the type of each tern. at last, we show a drone image of the colony with the nested terns locations.

In [1]:
!pip install streamlit==1.31.1 scikit-learn==1.3.2 joblib pandas

Collecting streamlit==1.31.1
  Downloading streamlit-1.31.1-py2.py3-none-any.whl.metadata (8.1 kB)
Collecting scikit-learn==1.3.2
  Downloading scikit_learn-1.3.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting importlib-metadata<8,>=1.4 (from streamlit==1.31.1)
  Downloading importlib_metadata-7.2.1-py3-none-any.whl.metadata (4.6 kB)
Collecting numpy<2,>=1.19.3 (from streamlit==1.31.1)
  Downloading numpy-1.26.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting packaging<24,>=16.8 (from streamlit==1.31.1)
  Downloading packaging-23.2-py3-none-any.whl.metadata (3.2 kB)
Collecting pillow<11,>=7.1.0 (from streamlit==1.31.1)
  Downloading pillow-10.4.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.2 kB)
Collecting protobuf<5,>=3.20 (from streamlit==1.31.1)
  Downloading protobuf-4.25.8-

In [2]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
import os

working_directory = '/content/drive/MyDrive/tern_project/Eyal/ClassifyTerns'

os.chdir(working_directory)

### Read breeding terns tracks path, labels statistics distributions and dates classify terns on from config file

In [4]:
import configparser

config = configparser.ConfigParser()
# Read the config file
config.read('./classify_terns.ini', encoding="utf8")
# List of dates to classify terns on
date = config.get('General', 'date')
# Path to classifier model
classifier_model = config.get('General', 'classifier_model')
# Directory path where one scan tracks located
OneScanTracks = config.get('General', 'one_scan_result_dir')
# Directory path where tracks classification will be saved
classification_result = config.get('General', 'classification_result_dir')
# Labels distribution of flags path
labels_distribution = config.get('General', 'labels_distribution')

In [5]:
date

'2025_05_29'

Read all scan directories on specific date

In [6]:
import glob
import datetime

year, month, day = date.split('_')
specific_date = datetime.date(int(year), int(month), int(day))

specific_date_str = specific_date.strftime("%Y_%m_%d")

# Get all directories within tracker_result_dir
scans_dirs_on_date = [dir for dir in glob.glob(os.path.join(OneScanTracks, '*', '*')) if os.path.isdir(dir) and specific_date_str in dir]

In [7]:
scans_dirs_on_date

['/content/drive/MyDrive/tern_project/Eyal/TrackingTerns/TrackerResults/OneScanTracks/2025/atlitcam181.stream_2025_05_29_10_01_50/tour0',
 '/content/drive/MyDrive/tern_project/Eyal/TrackingTerns/TrackerResults/OneScanTracks/2025/atlitcam181.stream_2025_05_29_10_01_50/tour1',
 '/content/drive/MyDrive/tern_project/Eyal/TrackingTerns/TrackerResults/OneScanTracks/2025/atlitcam181.stream_2025_05_29_15_01_50/tour0',
 '/content/drive/MyDrive/tern_project/Eyal/TrackingTerns/TrackerResults/OneScanTracks/2025/atlitcam181.stream_2025_05_29_15_01_50/tour1',
 '/content/drive/MyDrive/tern_project/Eyal/TrackingTerns/TrackerResults/OneScanTracks/2025/atlitcam191.stream_2025_05_29_08_59_50/tour0',
 '/content/drive/MyDrive/tern_project/Eyal/TrackingTerns/TrackerResults/OneScanTracks/2025/atlitcam191.stream_2025_05_29_08_59_50/tour1',
 '/content/drive/MyDrive/tern_project/Eyal/TrackingTerns/TrackerResults/OneScanTracks/2025/atlitcam191.stream_2025_05_29_13_59_50/tour0',
 '/content/drive/MyDrive/tern_proj

Read statistics distribution of terns type in flags

In [8]:
import json

# Read labels distribution file
with open(labels_distribution, 'r') as json_file:
    labels_distribution_in_flags_json = json.load(json_file)

# Display the first few lines of the JSON content
for i, (key, value) in enumerate(labels_distribution_in_flags_json.items()):
    print(key, value)
    if i == 3:
        break

4 {'Little-Stand': 0, 'Little-Sit': 0.29244808658910104, 'Common-Stand': 0.5669390274895842, 'Common-Sit': 99.14061288592131, 'Common-Fly': 0, 'Little-Fly': 0, 'Common-Stand-Tag': 0, 'Little-Stand-Tag': 0, 'Chick': 0, 'Other': 0}
24 {'Little-Stand': 0, 'Little-Sit': 0.6459420577439776, 'Common-Stand': 5.757713775924624, 'Common-Sit': 93.5963441663314, 'Common-Fly': 0, 'Little-Fly': 0, 'Common-Stand-Tag': 0, 'Little-Stand-Tag': 0, 'Chick': 0, 'Other': 0}
25 {'Little-Stand': 0, 'Little-Sit': 5.851566808555579, 'Common-Stand': 0.15662861081312188, 'Common-Sit': 93.9918045806313, 'Common-Fly': 0, 'Little-Fly': 0, 'Common-Stand-Tag': 0, 'Little-Stand-Tag': 0, 'Chick': 0, 'Other': 0}
26 {'Little-Stand': 0, 'Little-Sit': 16.455371816634585, 'Common-Stand': 0.21135178774633928, 'Common-Sit': 83.33327639561907, 'Common-Fly': 0, 'Little-Fly': 0, 'Common-Stand-Tag': 0, 'Little-Stand-Tag': 0, 'Chick': 0, 'Other': 0}


In [9]:
def calcAverageCoordinate(boxes_sequence):
    x1, x2, y1, y2 = 0, 0, 0, 0

    for prediction in boxes_sequence:
        x1 += prediction['box']['x1']
        x2 += prediction['box']['x2']
        y1 += prediction['box']['y1']
        y2 += prediction['box']['y2']

    return {'x1': x1 / len(boxes_sequence), 'y1': y1 / len(boxes_sequence), 'x2': x2 / len(boxes_sequence), 'y2': y2 / len(boxes_sequence)}

In [10]:
def sumClassesPrediction(boxes_sequence):
    classes_prediction = {
        "Little-Stand": {'freq': 0, 'conf': 0},
        "Little-Sit": {'freq': 0, 'conf': 0},
        "Common-Stand": {'freq': 0, 'conf': 0},
        "Common-Sit": {'freq': 0, 'conf': 0},
        "Common-Fly": {'freq': 0, 'conf': 0},
        "Little-Fly": {'freq': 0, 'conf': 0},
        "Common-Stand-Tag": {'freq': 0, 'conf': 0},
        "Little-Stand-Tag": {'freq': 0, 'conf': 0},
        "Chick": {'freq': 0, 'conf': 0},
        "Other": {'freq': 0, 'conf': 0}
    }

    for prediction in boxes_sequence:
        classes_prediction[prediction["name"]]['freq'] += 1
        classes_prediction[prediction["name"]]['conf'] += prediction['confidence']

    for key in classes_prediction.keys():
        classes_prediction[key]['conf'] = (classes_prediction[key]['conf'] / classes_prediction[key]['freq']) if classes_prediction[key]['freq'] > 0 else 0
        classes_prediction[key]['freq'] /= len(boxes_sequence)
        flat_classes_prediction = {}

    for key, value in classes_prediction.items():
        for sub_key, sub_value in value.items():
            new_key = key + '_y_' + sub_key
            flat_classes_prediction[new_key] = sub_value

    return flat_classes_prediction

In [11]:
date

'2025_05_29'

In [12]:
import re

def get_flag_number(file_name):
    # Define a regular expression pattern to match the flag number
    pattern = r'flag(\d+).'
    # Use re.search to find the pattern in the file name
    match = re.search(pattern, file_name)
    # Check if a match is found
    if match:
        # Extract the flag number from the matched group
        return match.group(1)
    else:
        print("No flag number found in the file name.")


def get_cam_number(file_name):
    # Define a regular expression pattern to match the flag number
    pattern = r'atlitcam(\d+).'
    # Use re.search to find the pattern in the file name
    match = re.search(pattern, file_name)
    # Check if a match is found
    if match:
        # Extract the flag number from the matched group
        return match.group(1)
    else:
        print("No flag number found in the file name.")


def extract_file_path(path, parent_dir_num):
    parts = path.split('/')
    # Extract the desired parts (adjust indices as needed)
    return '/'.join(parts[-1 - parent_dir_num:])

In [13]:
tracks_details = []

for tour_dir in scans_dirs_on_date:
    # Get the list of files in the directory
    files = [f for f in glob.glob(os.path.join(tour_dir, '*')) if f.endswith('.json')]
    for tracking_on_flag_file in files:
        # Extract the flag number from file name
        flag_number = get_flag_number(tracking_on_flag_file)
        # Extract the camera number from file name
        cam_number = get_cam_number(tracking_on_flag_file)
        # Get tern classes distribution
        flag_labels_distribution = labels_distribution_in_flags_json[flag_number]



        # Open the tracking result JSON file of a flag
        with open(tracking_on_flag_file, 'r') as file:
            tracking_result_report = json.load(file)

        for tracked_object in tracking_result_report["object_boxes"]:
            boxes_sequence_details = {
                'id': tracked_object['id'],
                **(calcAverageCoordinate(tracked_object['predictions'])),
                **(sumClassesPrediction(tracked_object['predictions'])),
                **flag_labels_distribution,
                'detection_rate': len(tracked_object['predictions']) / tracking_result_report['frames_number'],
                'movement_rate': tracked_object['iou'],
                'flag': flag_number,
                'file_path': extract_file_path(tracking_on_flag_file, 2),
            }

            tracks_details.append(boxes_sequence_details)

In [14]:
import pandas as pd

# Create dataframe for store boxes details
track_details_df = pd.DataFrame(tracks_details)

track_details_df.columns

Index(['id', 'x1', 'y1', 'x2', 'y2', 'Little-Stand_y_freq',
       'Little-Stand_y_conf', 'Little-Sit_y_freq', 'Little-Sit_y_conf',
       'Common-Stand_y_freq', 'Common-Stand_y_conf', 'Common-Sit_y_freq',
       'Common-Sit_y_conf', 'Common-Fly_y_freq', 'Common-Fly_y_conf',
       'Little-Fly_y_freq', 'Little-Fly_y_conf', 'Common-Stand-Tag_y_freq',
       'Common-Stand-Tag_y_conf', 'Little-Stand-Tag_y_freq',
       'Little-Stand-Tag_y_conf', 'Chick_y_freq', 'Chick_y_conf',
       'Other_y_freq', 'Other_y_conf', 'Little-Stand', 'Little-Sit',
       'Common-Stand', 'Common-Sit', 'Common-Fly', 'Little-Fly',
       'Common-Stand-Tag', 'Little-Stand-Tag', 'Chick', 'Other',
       'detection_rate', 'movement_rate', 'flag', 'file_path'],
      dtype='object')

Drop columns of terns class distribution that are not helpful as inputs for our model.


*   Chick
*   Common-Fly
*   Little-Fly
*   Common-Stand-Tag
*   Little-Stand-Tag

Also we change the distribution class names to more informative one

In [15]:
import pandas as pd

track_details_df = track_details_df.drop(columns=['Chick', 'Common-Fly','Little-Fly','Common-Stand-Tag', 'Little-Stand-Tag'])

track_details_df = track_details_df.rename(columns={
    'Little-Stand': 'Little-Stand_f_freq',
    'Little-Sit': 'Little-Sit_f_freq',
    'Common-Stand': 'Common-Stand_f_freq',
    'Common-Sit': 'Common-Sit_f_freq',
    'Other': 'Other_f_freq',
})

Calculating for each box the width and heigh size in cm. The calculations involve camera calibration, 3D to 2D transformations, and the use of PTZ (Pan-Tilt-Zoom) camera parameters. It's done by using a library writted to mapping box pixel coordinates into real world location.

In [16]:
import sys

sys.path.append('/content/drive/MyDrive/tern_project/Eyal/RealCoordinatesCalculator')
from real_coordinates_calculator import RealCoordinatesCalculator


real_coordinates_calculator = RealCoordinatesCalculator()
# Group by 'flag' and apply the box size calculation function
boxes_size_df = track_details_df.groupby('flag').apply(real_coordinates_calculator.calc_box_size)

# Drop flag as index from dataframe
boxes_size_df.index = boxes_size_df.index.droplevel(0)

# Merge the calculated results back into the original DataFrame
track_details_df = pd.merge(track_details_df, boxes_size_df, left_index=True, right_index=True)

# Display the resulting DataFrame
track_details_df.head()

Loading file /content/drive/MyDrive/tern_project/Eyal/RealCoordinatesCalculator/PTZCamValues181_mod.txt...
Loading file /content/drive/MyDrive/tern_project/Eyal/RealCoordinatesCalculator/PTZCamValues191_mod.txt...


  boxes_size_df = track_details_df.groupby('flag').apply(real_coordinates_calculator.calc_box_size)


Unnamed: 0,id,x1,y1,x2,y2,Little-Stand_y_freq,Little-Stand_y_conf,Little-Sit_y_freq,Little-Sit_y_conf,Common-Stand_y_freq,...,dy/dx_uv,pix_x1,pix_y1,pix_x2,pix_y2,DX_cm,DY_cm,dx_pix_drone,dy_pix_drone,Area
0,0,1039.008898,500.503003,1123.325004,543.119891,0.0,0.0,0.0,0.0,1.0,...,0.505442,10312,17512,10407,17307,52.70545,26.639539,95,205,1404.048894
1,3,999.097692,540.184454,1074.571221,567.523336,0.0,0.0,0.0,0.0,0.0,...,0.362231,10451,17353,10498,17223,25.88958,9.378018,47,130,242.792956
2,4,1145.479708,324.220394,1206.911758,346.709637,0.0,0.0,0.0,0.0,0.0,...,0.366083,9552,18453,9628,18282,42.676051,15.622986,76,171,666.727341
3,5,900.286996,642.813416,951.510546,680.418599,0.0,0.0,0.0,0.0,0.0,...,0.734139,10760,16997,10826,16862,36.781519,27.002732,66,135,993.201485
4,8,970.959067,346.520132,1026.505751,373.260723,0.0,0.0,0.0,0.0,0.111111,...,0.481408,9753,18366,9844,18179,50.17829,24.156206,91,187,1212.117105


## Classify all sequences of boxes representation by a pre-trained Random Forest model.

Classify all sequences of boxes representation by a pre-trained Random Forest model.

In [17]:
import joblib

# Load the trained Random Forest model
rf_model = joblib.load(classifier_model)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


In [18]:
track_details_df.columns

Index(['id', 'x1', 'y1', 'x2', 'y2', 'Little-Stand_y_freq',
       'Little-Stand_y_conf', 'Little-Sit_y_freq', 'Little-Sit_y_conf',
       'Common-Stand_y_freq', 'Common-Stand_y_conf', 'Common-Sit_y_freq',
       'Common-Sit_y_conf', 'Common-Fly_y_freq', 'Common-Fly_y_conf',
       'Little-Fly_y_freq', 'Little-Fly_y_conf', 'Common-Stand-Tag_y_freq',
       'Common-Stand-Tag_y_conf', 'Little-Stand-Tag_y_freq',
       'Little-Stand-Tag_y_conf', 'Chick_y_freq', 'Chick_y_conf',
       'Other_y_freq', 'Other_y_conf', 'Little-Stand_f_freq',
       'Little-Sit_f_freq', 'Common-Stand_f_freq', 'Common-Sit_f_freq',
       'Other_f_freq', 'detection_rate', 'movement_rate', 'flag', 'file_path',
       'tern_x', 'tern_y', 'dy/dx_uv', 'pix_x1', 'pix_y1', 'pix_x2', 'pix_y2',
       'DX_cm', 'DY_cm', 'dx_pix_drone', 'dy_pix_drone', 'Area'],
      dtype='object')

In [19]:
# Re-order feature to fit with the models features
X = track_details_df[rf_model.feature_names_in_]

In [20]:
y_pred = rf_model.predict(X)

track_details_df['y_pred'] = y_pred

# Get the probabilities for each class
y_pred_proba = rf_model.predict_proba(X)

# If you want the confidence for the predicted class, take the maximum probability
track_details_df['confidence'] = y_pred_proba.max(axis=1).round(3)

### Create dictionary of classifications

In [21]:
classifications = {}

for index, track_details in track_details_df.iterrows():
    if track_details['file_path'] not in classifications:
        classifications[track_details['file_path']] = {}

    classifications[track_details['file_path']][track_details['id']] = [track_details['y_pred'], track_details['confidence']]

### Save classifications results on JSONs

In [22]:
import shutil

# Iterate over the data and create individual files
for flag_path, flag_classifications in classifications.items():
    dir_path = classification_result + os.path.dirname(flag_path)

    # Ensure the directory exists
    os.makedirs(dir_path, exist_ok=True)

    # Write the content to the file
    with open(classification_result + flag_path, 'w') as f:
        json.dump(flag_classifications, f, indent=4)