#### Download all images from FTP to local filesystem

In [2]:
from object_detection import object_detection_runpod
from tqdm import tqdm
from datetime import datetime
from file_helper import write_file_to_s3, get_presigned_url
import json
import logging
from datetime import datetime
from logging.handlers import RotatingFileHandler
import pickle
import os
import astral
from astral.sun import sun
from pytz import timezone
from datetime import datetime
import os
import pickle

from dotenv import load_dotenv
from ftp import SingletonFTP

In [4]:
log_filename = datetime.now().strftime('scraper-%Y%m%d-%H%M.log')
file_handler = RotatingFileHandler(
    log_filename, maxBytes=10000, backupCount=1)
file_handler.setLevel(logging.INFO)
formatter = logging.Formatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(formatter)

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger('PlateMatch-Report')

class PrettyPrintDictFormatter(logging.Formatter):
    def format(self, record):
        if isinstance(record.msg, dict):
            record.msg = json.dumps(record.msg, indent=4)
        return super().format(record)

pretty_formatter = PrettyPrintDictFormatter(
    '%(asctime)s - %(name)s - %(levelname)s - %(message)s')
file_handler.setFormatter(pretty_formatter)

logger.addHandler(file_handler)

In [None]:


load_dotenv()
host = os.getenv('FTP_HOST')
username = os.getenv('FTP_USERNAME')
password = os.getenv('FTP_PASSWORD')


ftp = SingletonFTP(host, username, password)
ftp.download_all('/files/files-mailseth', 'tmp')

### extract metadata from filename

In [None]:

location = astral.LocationInfo(
    "Oakland", "USA", "US/Pacific", 37.8044, -122.2712
)

In [None]:

image_dir = 'tmp'


image_files_list = [{'local_path': os.path.join(image_dir, f)}
               for f in os.listdir(image_dir) if f.endswith('.jpg')]

for output in image_files_list:
    filename = output['local_path'].split('/')[-1]
    camera_name, date, hour, minutes = filename.split('_')[3], filename.split(
        '_')[4], filename.split('_')[7], filename.split('_')[8].split('.')[0]
    seconds = filename.split('_')[8].split('.')[1][0:2]
    year, month, day = date.split('-')
    output['camera_name'] = camera_name
    datetime_obj = datetime(int(year), int(month), int(
        day), int(hour), int(minutes), int(seconds))
    output['datetime'] = datetime_obj
    # Use astral library to get sun information for the specific date and location
    s = sun(location.observer, date=output['datetime'].date(
    ), tzinfo=timezone('US/Pacific'))

    output['S3_key'] = camera_name + '_' + datetime_obj.strftime('%Y%m%d_%H%M%S') + '.jpg'

    # Classify the lighting based on the sun information
    if s['dawn'].time() <= output['datetime'].time() < s['sunrise'].time():
        output['lighting'] = 'dawn'
    elif s['sunrise'].time() <= output['datetime'].time() < s['sunset'].time():
        output['lighting'] = 'day'
    elif s['sunset'].time() <= output['datetime'].time() < s['dusk'].time():
        output['lighting'] = 'dusk'
    else:
        output['lighting'] = 'night'

### Detect objects in frames


In [None]:
image_files_list[0]

In [None]:


object_counts = {}
for image_file in image_files_list:
    status = image_file.get('detection_result', {}).get('status')
    if status:
        object_counts[status] = object_counts.get(status, 0) + 1
logger.info(object_counts)

detection_counts = 0
for image_file in image_files_list:
    status = image_file.get('detection_result', None)
    if status:
        detection_counts = detection_counts + 1
logger.info(detection_counts)


In [None]:
for iteration in tqdm(image_files_list):
    if 'detection_result' not in iteration:
        write_file_to_s3(iteration['S3_key'], iteration['local_path'])
        image_url = get_presigned_url(iteration['S3_key'])

        detection_result = object_detection_runpod(image_url)
        logger.info('detection_result',detection_result)
        # detection_result_json = detection_result.pandas().xyxy[0].to_json()
        logger.info('image_file', iteration['local_path'])
        logger.info('detection_result', detection_result)
        iteration['detection_result'] = detection_result
        # output_list.append({
        #     'image_file': image_file,
        #     'detection_result': detection_result['output']
        # })
        # Explicitly delete the detection_result and call the garbage collector
        del detection_result

# PICKLE files

with open('image_files_list_partial_' + datetime.now().strftime("%Y%m%d%H%M%S") + '.pkl', 'wb') as f:
    pickle.dump(image_files_list, f)

In [5]:

file_to_read = 'image_files_list_partial_202312011622.pkl'

with open('output_list.pkl', 'rb') as f:
    output_list = pickle.load(f)

logger.info("loaded pickle file with {} records".format(len(output_list)))

INFO:PlateMatch-Report:loaded pickle file with 5112 records


In [6]:
for item in tqdm(output_list[0:10]):
    # print(item['detection_result'])
    if(item['detection_result'] is None):
        logger.info('skipping')
        continue
    # detection_results = 
    # print(detection_results)

    logger.info(json.loads(item['detection_result']))

  0%|                                              | 0/10 [00:00<?, ?it/s]INFO:PlateMatch-Report:{
    "xmin": {},
    "ymin": {},
    "xmax": {},
    "ymax": {},
    "confidence": {},
    "class": {},
    "name": {}
}
INFO:PlateMatch-Report:{
    "xmin": {
        "0": 3239.08984375,
        "1": 3388.2124023438
    },
    "ymin": {
        "0": 883.1495361328,
        "1": 343.2238769531
    },
    "xmax": {
        "0": 3834.1628417969,
        "1": 3832.9594726562
    },
    "ymax": {
        "0": 1698.775390625,
        "1": 761.1273803711
    },
    "confidence": {
        "0": 0.7338661551,
        "1": 0.3350137174
    },
    "class": {
        "0": 2,
        "1": 2
    },
    "name": {
        "0": "car",
        "1": "car"
    }
}


INFO:PlateMatch-Report:{
    "xmin": {
        "0": 1495.6081542969,
        "1": 1137.8552246094,
        "2": 1145.3131103516
    },
    "ymin": {
        "0": 378.7402954102,
        "1": 913.7869873047,
        "2": 914.1953125
    },
    "xmax": {
        "0": 1836.0078125,
        "1": 1328.3980712891,
        "2": 1326.0579833984
    },
    "ymax": {
        "0": 1103.6357421875,
        "1": 1094.6160888672,
        "2": 1088.2670898438
    },
    "confidence": {
        "0": 0.9138361216,
        "1": 0.6103878617,
        "2": 0.4238918722
    },
    "class": {
        "0": 0,
        "1": 15,
        "2": 16
    },
    "name": {
        "0": "person",
        "1": "cat",
        "2": "dog"
    }
}
INFO:PlateMatch-Report:{
    "xmin": {
        "0": 2652.6628417969
    },
    "ymin": {
        "0": 841.4736938477
    },
    "xmax": {
        "0": 3480.345703125
    },
    "ymax": {
        "0": 1529.9069824219
    },
    "confidence": {
        "0": 0.25237602
    },
    "cla

In [7]:
for item in tqdm(output_list):

    detection_results = json.loads(item['detection_result'])
    logger.info('detection_results', detection_results)
    # Remove heavily overlapping bounding boxes, keeping the one with higher confidence
    i = 0
    while i < len(detection_results['name']):
        j = i + 1
        while j < len(detection_results['name']):
            # Calculate the intersection over union (IoU) of the two bounding boxes
            # logger.info(detection_results)
            if str(i) in detection_results['xmin'] and str(j) in detection_results['xmin']:
                xi1 = max(detection_results['xmin'][str(i)],
                          detection_results['xmin'][str(j)])
                yi1 = max(detection_results['ymin'][str(i)],
                          detection_results['ymin'][str(j)])
                xi2 = min(detection_results['xmax'][str(i)],
                          detection_results['xmax'][str(j)])
                yi2 = min(detection_results['ymax'][str(i)],
                          detection_results['ymax'][str(j)])
                inter_area = max(0, xi2 - xi1 + 1) * max(0, yi2 - yi1 + 1)
                box1_area = (detection_results['xmax'][str(i)] - detection_results['xmin'][str(i)] + 1) * (
                    detection_results['ymax'][str(i)] - detection_results['ymin'][str(i)] + 1)
                box2_area = (detection_results['xmax'][str(j)] - detection_results['xmin'][str(j)] + 1) * (
                    detection_results['ymax'][str(j)] - detection_results['ymin'][str(j)] + 1)
                iou = inter_area / float(box1_area + box2_area - inter_area)

                # If the IoU is greater than 0.5, they are heavily overlapping
                if iou > 0.5:
                    logger.info(
                        f"Processing image {item['image_file']}")
                    logger.info(iou)
                    logger.info("detection_results['confidence'][str(i)]",
                          detection_results['confidence'][str(i)])
                    logger.info("detection_results['confidence'][str(j)]",
                          detection_results['confidence'][str(j)])
                    logger.info(
                        f"Removed bounding box for {detection_results} in image {item['image_file']}")
                    # Remove the bounding box with lower confidence
                    if detection_results['confidence'][str(i)] > detection_results['confidence'][str(j)]:
                        detection_results['name'].pop(str(j))
                        detection_results['xmin'].pop(str(j))
                        detection_results['ymin'].pop(str(j))
                        detection_results['xmax'].pop(str(j))
                        detection_results['ymax'].pop(str(j))
                        detection_results['confidence'].pop(str(j))
                    else:

                        detection_results['name'].pop(str(i))
                        detection_results['xmin'].pop(str(i))
                        detection_results['ymin'].pop(str(i))
                        detection_results['xmax'].pop(str(i))
                        detection_results['ymax'].pop(str(i))
                        detection_results['confidence'].pop(str(i))

                        i -= 1
                    break
            j += 1
        i += 1

  0%|                                            | 0/5112 [00:00<?, ?it/s]INFO:PlateMatch-Report:detection_results
INFO:PlateMatch-Report:detection_results
INFO:PlateMatch-Report:detection_results
INFO:PlateMatch-Report:Processing image tmp/_files_NorthStreetCam_2023-11-09_001_jpg_13_26.04[M][0@0][0].jpg
INFO:PlateMatch-Report:0.913585038805267
--- Logging error ---
Traceback (most recent call last):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/logging/handlers.py", line 73, in emit
    if self.shouldRollover(record):
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/logging/handlers.py", line 196, in shouldRollover
    msg = "%s\n" % self.format(record)
  File "/Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/logging/__init__.py", line 943, in format
    return fmt.format(record)
  File "/var/folders/x5/72g55pyx0ynbhrkjqm208fcc0000gn/T/ipykernel_15042/3507044314.py", line 16, in format
    return super().format(reco