In [None]:
!nvcc --version
!python --version


In [None]:

#Install Detectron2 from the source
!pip install 'git+https://github.com/facebookresearch/detectron2.git'
print(torch.__version__)
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode

In [None]:
#import libraries
import glob
import json
import os
import cv2
import numpy as np
import torch




## 1. resize images and create COCO annotations json file


In [None]:
#Set up directory and mounting from GDrive
import os
from google.colab import drive
drive.mount('/content/drive')

#os.chdir('/content/drive/MyDrive/Project/Kaggle/Water bodies resized/')

#DATASET https://www.kaggle.com/datasets/franciscoescobar/satellite-images-of-water-bodies

In [None]:
#we need to resize images before creating annotated dataset so that the annotations contain the correct dimensions

from PIL import Image

def resize_image_and_mask(image_dir, mask_dir, output_image_dir, output_mask_dir, new_size):
    # Create output directories if they don't exist
    os.makedirs(output_image_dir, exist_ok=True)
    os.makedirs(output_mask_dir, exist_ok=True)

    # List all images in the image directory
    image_files = [f for f in os.listdir(image_dir) if f.endswith('.jpg') or f.endswith('.png')]

    for image_file in image_files:
        # Define the full path to the image and mask files
        image_path = os.path.join(image_dir, image_file)
        mask_path = os.path.join(mask_dir, image_file)

        # Read the image and mask using OpenCV
        image = cv2.imread(image_path)
        mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)  # Read the mask in grayscale

        if image is None or mask is None:
            print(f"Error reading file {image_file}, skipping...")
            continue

        # Resize the image and mask
        resized_image = cv2.resize(image, new_size, interpolation=cv2.INTER_AREA)
        resized_mask = cv2.resize(mask, new_size, interpolation=cv2.INTER_NEAREST)

        # Define the output paths
        output_image_path = os.path.join(output_image_dir, image_file)
        output_mask_path = os.path.join(output_mask_dir, image_file)

        # Save the resized image and mask
        cv2.imwrite(output_image_path, resized_image)
        cv2.imwrite(output_mask_path, resized_mask)

        print(f"Processed and saved: {image_file}")

# Configuration
image_dir = '/content/drive/MyDrive/Project/Kaggle/Water Bodies Dataset/Images/'
mask_dir = '/content/drive/MyDrive/Project/Kaggle/Water Bodies Dataset/Masks/'
output_image_dir = '/content/drive/MyDrive/Project/Kaggle/Water bodies resized/images/'
output_mask_dir = '/content/drive/MyDrive/Project/Kaggle/Water bodies resized/masks/'
new_size = (512, 512)  # New width and height as per paper

# Process all images and masks
resize_image_and_mask(image_dir, mask_dir, output_image_dir, output_mask_dir, new_size)



In [None]:
#Attempt 2
###FROM: https://github.com/waspinator/pycococreator/blob/master/pycococreatortools/pycococreatortools.py
#!/usr/bin/env python3

import os
import re
import json
import datetime
import numpy as np
from itertools import groupby
from skimage import measure
from skimage.io import imread
from PIL import Image
from pycocotools import mask
from pycocotools import mask as coco_mask

# Paths to images and masks
images_path = "/content/drive/MyDrive/Project/Kaggle/Water bodies resized/images/"
masks_path = "/content/drive/MyDrive/Project/Kaggle/Water bodies resized/masks/"

convert = lambda text: int(text) if text.isdigit() else text.lower()
natural_key = lambda key: [convert(c) for c in re.split('([0-9]+)', key)]

def resize_binary_mask(array, new_size):
    image = Image.fromarray(array.astype(np.uint8)*255)
    image = image.resize(new_size, Image.NEAREST)
    return np.asarray(image).astype(np.bool_)

def close_contour(contour):
    if not np.array_equal(contour[0], contour[-1]):
        contour = np.vstack((contour, contour[0]))
    return contour

def binary_mask_to_polygon(binary_mask, tolerance=0):
    polygons = []
    padded_binary_mask = np.pad(binary_mask, pad_width=1, mode='constant', constant_values=0)
    contours = measure.find_contours(padded_binary_mask, 0.5)

    for contour in contours:
        # Subtract 1 from each contour point
        contour = contour - 1  # This operates element-wise on the numpy array

        contour = close_contour(contour)
        contour = measure.approximate_polygon(contour, tolerance)
        if len(contour) < 3:
            continue
        contour = np.flip(contour, axis=1)
        segmentation = contour.ravel().tolist()
        segmentation = [0 if i < 0 else i for i in segmentation]
        polygons.append(segmentation)
    return polygons


def create_image_info(image_id, file_name, image_size, date_captured=datetime.datetime.utcnow().isoformat(' '),
                      license_id=1, coco_url="", flickr_url=""):
    return {
        "id": image_id,
        "file_name": file_name,
        "width": image_size[0],
        "height": image_size[1],
        "date_captured": date_captured,
        "license": license_id,
        "coco_url": coco_url,
        "flickr_url": flickr_url
    }

def create_annotation_info(annotation_id, image_id, category_info, binary_mask, tolerance=2):
    binary_mask_encoded = coco_mask.encode(np.asfortranarray(binary_mask.astype(np.uint8)))
    area = coco_mask.area(binary_mask_encoded)
    if area < 1:
        return None
    bounding_box = coco_mask.toBbox(binary_mask_encoded)
    segmentation = binary_mask_to_polygon(binary_mask, tolerance)
    if not segmentation:
        return None
    return {
        "id": annotation_id,
        "image_id": image_id,
        "category_id": category_info["id"],
        "iscrowd": category_info["is_crowd"],
        "area": float(area),
        "bbox": bounding_box.tolist(),
        "segmentation": segmentation,
        "width": binary_mask.shape[1],
        "height": binary_mask.shape[0]
    }

# Initialize COCO dataset structure
coco_output = {
    "info": {},
    "licenses": [],
    "images": [],
    "annotations": [],
    "categories": [{"id": 0, "name": "water_body", "supercategory": "water_body"}]
}

image_id = 1
annotation_id = 1

# Read images and masks
# Read images and masks
for img_file in sorted(os.listdir(images_path), key=natural_key):
    if img_file.endswith('.jpg'):
        mask_file = img_file  # Use the same file name for mask as the image
        image_path = os.path.join(images_path, img_file)
        mask_path = os.path.join(masks_path, mask_file)

        # Load image and mask
        image = imread(image_path)
        mask = imread(mask_path) > 128  # Directly load JPG mask and apply threshold

        # Create COCO data
        image_info = create_image_info(image_id, img_file, image.shape)
        annotation_info = create_annotation_info(annotation_id, image_id, {"id": 0, "is_crowd": 0}, mask)

        if annotation_info:
            coco_output["images"].append(image_info)
            coco_output["annotations"].append(annotation_info)
            image_id += 1
            annotation_id += 1

# Save to JSON
with open('coco_dataset.json', 'w') as outfile:
    json.dump(coco_output, outfile, indent=4)





## 2. Create a train/test split for the annotations

In [None]:

#install funcy for the script below
!pip install funcy


!pip install scikit-multilearn



#from: https://github.com/akarazniewicz/cocosplit/blob/master/cocosplit.py

#!python /content/drive/MyDrive/Project/Kaggle/split_coco_annotations.py /content/drive/MyDrive/Project/Kaggle/Water\ bodies\ resized/annotations.json /content/drive/MyDrive/Project/Kaggle/Water\ bodies\ resized/train_annotations.json /content/drive/MyDrive/Project/Kaggle/Water\ bodies\ resized/test_annotations.json -s 0.8 --having-annotations --multi-class

In [None]:


images_path = "/content/drive/MyDrive/Project/Kaggle/Water bodies resized/images/"
mask_path = "/content/drive/MyDrive/Project/Kaggle/Water bodies resized/masks/"


def create_train_test_split(annotations_path, train_ratio=0.8):
    # Load annotations
    with open(annotations_path, 'r') as f:
        data = json.load(f)

    # Shuffle images
    images = data['images']
    np.random.shuffle(images)

    # Split into train and test
    num_train = int(len(images) * train_ratio)
    train_images = images[:num_train]
    test_images = images[num_train:]

    # Function to filter annotations based on image ids
    def filter_annotations(images, annotations):
        image_ids = {image['id'] for image in images}
        return [anno for anno in annotations if anno['image_id'] in image_ids]

    # Split annotations
    train_annotations = filter_annotations(train_images, data['annotations'])
    test_annotations = filter_annotations(test_images, data['annotations'])

    # Create train and test datasets
    train_dataset = {'images': train_images, 'annotations': train_annotations, 'categories': data['categories']}
    test_dataset = {'images': test_images, 'annotations': test_annotations, 'categories': data['categories']}

    # Save new annotation files
    train_annotations_path = os.path.join(os.path.dirname(annotations_path), 'annotations_train.json')
    test_annotations_path = os.path.join(os.path.dirname(annotations_path), 'annotations_test.json')

    with open(train_annotations_path, 'w') as f:
        json.dump(train_dataset, f)
    with open(test_annotations_path, 'w') as f:
        json.dump(test_dataset, f)

    return train_annotations_path, test_annotations_path

# Example usage
annotations_path = '/content/drive/MyDrive/Project/Kaggle/Water bodies resized/coco_dataset.json'
train_annotations_path, test_annotations_path = create_train_test_split(annotations_path)
print("Train annotations saved to:", train_annotations_path)
print("Test annotations saved to:", test_annotations_path)

## 3. Register the dataset for use on the Detectron 2 dataset
### Adapting tutorial on the Detectron2 Github:
### https://detectron2.readthedocs.io/en/latest/tutorials/datasets.html
### https://colab.research.google.com/drive16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5#scrollTo=tVJoOm6LVJwW

In [None]:
from detectron2.data import DatasetCatalog, MetadataCatalog
from detectron2.structures import BoxMode


def get_waterbodies_dicts(img_dir, annotations_json):
    json_file = os.path.join(img_dir, annotations_json)
    with open(json_file) as f:
        imgs_anns = json.load(f)

    dataset_dicts = []
    for idx, v in enumerate(imgs_anns['images']):
        record = {}
        filename = os.path.join(img_dir, v['file_name'])
        height, width = cv2.imread(filename).shape[:2]

        record["file_name"] = filename
        record["image_id"] = idx
        record["height"] = height
        record["width"] = width

        annos = [anno for anno in imgs_anns['annotations'] if anno['image_id'] == v['id']]
        objs = []
        for anno in annos:
            obj = {
                "bbox": anno['bbox'],
                "bbox_mode": BoxMode.XYWH_ABS,
                "segmentation": anno['segmentation'],
                "category_id": anno['category_id'],
            }
            objs.append(obj)
        record["annotations"] = objs
        dataset_dicts.append(record)
    return dataset_dicts

# Paths to the train and test annotation files
train_annotations_path = '/content/drive/MyDrive/Project/Kaggle/Water bodies resized/annotations_train.json'
test_annotations_path = '/content/drive/MyDrive/Project/Kaggle/Water bodies resized/annotations_test.json'

# Register the datasets
DatasetCatalog.register("waterbodies_train", lambda: get_waterbodies_dicts("/content/drive/MyDrive/Project/Kaggle/Water bodies resized/images/", train_annotations_path))
MetadataCatalog.get("waterbodies_train").set(thing_classes=["waterbody"])

DatasetCatalog.register("waterbodies_test", lambda: get_waterbodies_dicts("/content/drive/MyDrive/Project/Kaggle/Water bodies resized/images/", test_annotations_path))
MetadataCatalog.get("waterbodies_test").set(tfhing_classes=["waterbody"])




In [None]:
#get the datacatalog to verify that the dataset has been registered correctly

nuts_metadata = MetadataCatalog.get('waterbodies_train')
dataset_dicts = DatasetCatalog.get("waterbodies_train")

## 4. Configure model

In [None]:
from detectron2.config import get_cfg
from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer
from detectron2.utils.logger import setup_logger
from detectron2.data import MetadataCatalog, DatasetCatalog
import detectron2.data.transforms as T


#FROM: https://colab.research.google.com/drive/16jcaJoc6bCFAQ96jDe2HwtXj7BMD_-m5#scrollTo=7unkuuiqLdqd

#Defaults: https://github.com/facebookresearch/detectron2/blob/main/detectron2/config/defaults.py

cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("waterbodies_train",)
#maybe remove the test function as no evaluator is set. AND you will evaluate later
cfg.DATASETS.TEST = ("waterbodies_test",)
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Initialize from model zoo weights

cfg.SOLVER.IMS_PER_BATCH = 2
#cfg.SOLVER.BASE_LR = 0.02
cfg.SOLVER.MAX_ITER = 100    # I may need to adjust this based on dataset size and desired training time.
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 128
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1  # Do I need to update this based on class size?

cfg.INPUT.MIN_SIZE_TRAIN = (512,)  # Only one size, no choice needed
cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
cfg.INPUT.MAX_SIZE_TRAIN = 512  # No need to allow any larger sizes
cfg.INPUT.MIN_SIZE_TEST = 512  # Same as training
cfg.INPUT.MAX_SIZE_TEST = 512  # Same as training


#SHOW CONFIGURATION
#cfg.display()????????????????

#cfg.SOLVER.STEPS = [15000, 35000]  # Efor 50,000

#cfg.SOLVER.STEPS = [5000, 15000]



#STEP 2 FINE TUNE

cfg.OUTPUT_DIR = "/content/drive/MyDrive/Project/MaskR_CNN_Modelv7/"
os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)



In [None]:
#check that GPU is connected
if torch.cuda.is_available:
  print('GPU available')
else:
  print('Please set GPU via Edit -> Notebook Settings.')

## 5. train!

In [None]:

cfg.VIS_PERIOD = 1


# Initialize the trainer and start training
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)
trainer.train()


## 6. Evaluate!

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader

cfg = get_cfg()
# Add model configuration settings (the same settings used during training)
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
#cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # Threshold for confidence score
cfg.MODEL.WEIGHTS = "/content/drive/MyDrive/Project/MaskR_CNN_Modelv6/model_final.pth"  # Path to the model weights

# adjust the number of classes based on dataset
#cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3

cfg.DATASETS.TRAIN = ("waterbodies_train",)
#maybe remove the test function as no evaluator is set. AND you will evaluate later

cfg.DATASETS.TEST = ("waterbodies_test",)

val_loader = build_detection_test_loader(cfg, "waterbodies_test")
evaluator = COCOEvaluator("waterbodies_test", cfg, False, output_dir="/content/drive/MyDrive/Project/MaskR_CNN_Modelv6/output/")


# Set the threshold for this model
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5

# Load the dataset and evaluator


trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=False)  # Load the model weights
print(inference_on_dataset(trainer.model, val_loader, evaluator))

Use this if loading model after deleting session

In [None]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

from detectron2 import model_zoo
from detectron2.engine import DefaultTrainer
from detectron2.config import get_cfg
from detectron2.data import build_detection_test_loader, MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances
from detectron2.evaluation import COCOEvaluator, inference_on_dataset

# Register the dataset only if it's not already registered
dataset_name = "waterbodies_test"
annotation_file = '/content/drive/MyDrive/Project/Kaggle/Water bodies resized/annotations_test.json'
image_dir = '/content/drive/MyDrive/Project/Kaggle/Water bodies resized/images/'

if dataset_name not in DatasetCatalog.list():
    register_coco_instances(dataset_name, {}, annotation_file, image_dir)


# Configuration for the model
cfg = get_cfg()
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.WEIGHTS = "/content/drive/MyDrive/Project/MaskR_CNN_Modelv6/model_final.pth"

#cfg.MODEL.WEIGHTS = "/content/drive/MyDrive/Project/MaskR_CNN_Modelnoresize/model_final.pth"

cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 1
cfg.DATASETS.TEST = ("waterbodies_test",)
cfg.DATASETS.TRAIN = ("waterbodies_train",)  # Make sure no default COCO dataset is referenced
cfg.DATASETS.VAL = ("waterbodies_test",)  # Make sure no default COCO dataset is referenced
# cfg.INPUT.MIN_SIZE_TRAIN = (512,)  # Only one size, no choice needed
# cfg.INPUT.MIN_SIZE_TRAIN_SAMPLING = "choice"
# cfg.INPUT.MAX_SIZE_TRAIN = 512  # No need to allow any larger sizes
# cfg.INPUT.MIN_SIZE_TEST = 512  # Same as training
# cfg.INPUT.MAX_SIZE_TEST = 512  # Same as training
# Setting up the test data loader and evaluator
val_loader = build_detection_test_loader(cfg, "waterbodies_test")
evaluator = COCOEvaluator("waterbodies_test", cfg, False, output_dir="/content/drive/MyDrive/Project/MaskR_CNN_Modelv6/output/")

# Load the model and evaluate
trainer = DefaultTrainer(cfg)
trainer.resume_or_load(resume=True)
#results = inference_on_dataset(trainer.model, val_loader, evaluator)
#print(results)

## 7. Get predicted masks and pixel count

In [None]:
import os
import cv2
import numpy as np
import pandas as pd
import json
import re
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg


#Sources: https://stackoverflow.com/questions/64210521/compute-precision-and-accuracy-using-numpy
# sources: https://github.com/facebookresearch/detectron2/issues/984
#Sources: https://stackoverflow.com/questions/46689428/convert-np-array-of-type-float64-to-type-uint8-scaling-values
#sources: https://stackoverflow.com/questions/60227833/how-to-filter-coco-dataset-classes-annotations-for-custom-dataset



predictor = DefaultPredictor(cfg)

# Define paths
annotation_path = '/content/drive/MyDrive/Project/Kaggle/Water bodies resized/annotations_train.json'
image_dir = '/content/drive/MyDrive/Project/Kaggle/Water bodies resized/images/'
actual_masks_dir = '/content/drive/MyDrive/Project/MaskR_CNN_Modelv6/actual masks/'
predicted_masks_dir = '/content/drive/MyDrive/Project/MaskR_CNN_Modelv6/predicted masks/'
output_csv_path = '/content/drive/MyDrive/Project/MaskR_CNN_Modelv6/water_pixel_counts44.csv'

# Create directories if they do not exist
os.makedirs(actual_masks_dir, exist_ok=True)
os.makedirs(predicted_masks_dir, exist_ok=True)

# Load annotations
with open(annotation_path) as f:
    annotations = json.load(f)

image_files = os.listdir(image_dir)
results = []

for image_file in image_files:
    image_path = os.path.join(image_dir, image_file)
    image = cv2.imread(image_path)
    if image is None:
        print(f"Could not read image {image_path}")
        continue
    # Detectron2's DefaultPredictor expects images in BGR format (as read by OpenCV)

    # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # Use regex to find the last group of digits in the filename
    image_id_matches = re.findall(r'(\d+)(?!.*\d)', image_file)
    if image_id_matches:
        image_id = int(image_id_matches[0])  # Get the last group of digits as the image ID
    else:
        print(f"No image ID found in filename {image_file}")
        continue  # Skip files where no numeric ID is found

    print(f"Processing image_id: {image_id}")

    # Find all annotations for this image_id
    ann_list = [ann for ann in annotations['annotations'] if ann['image_id'] == image_id]
    if not ann_list:
        print(f"No annotation found for image_id {image_id}")
        continue

    # Create the actual mask by combining all annotations for this image
    mask_image = np.zeros((image.shape[0], image.shape[1]), dtype=np.uint8)

    for ann in ann_list:
        segmentation = ann['segmentation']
        polygons = []
        if isinstance(segmentation, list):
            if isinstance(segmentation[0], list):
                # Multiple polygons
                for seg in segmentation:
                    coords = np.array(seg).reshape(-1, 2)
                    polygons.append(coords.astype(np.int32))
            else:
                # Single polygon
                coords = np.array(segmentation).reshape(-1, 2)
                polygons.append(coords.astype(np.int32))
        else:
            print(f"Unknown segmentation format for image_id {image_id}")
            continue
        # Fill the polygons on the mask
        cv2.fillPoly(mask_image, polygons, 255)

    actual_water_pixels = np.sum(mask_image == 255)

    # Generate predictions
    outputs = predictor(image)
    pred_masks = outputs['instances'].pred_masks.cpu().numpy()
    if pred_masks.size == 0:
        predicted_mask = np.zeros_like(mask_image)
    else:
        predicted_mask = (np.sum(pred_masks, axis=0) >= 1).astype(np.uint8) * 255
    predicted_water_pixels = np.sum(predicted_mask == 255)

    # Convert masks to binary format (0 and 1)
    actual_mask_binary = (mask_image == 255).astype(np.uint8)
    predicted_mask_binary = (predicted_mask == 255).astype(np.uint8)

    # Flatten the masks for calculation
    actual_mask_flat = actual_mask_binary.flatten()
    predicted_mask_flat = predicted_mask_binary.flatten()

    # Calculate True Positives (TP), False Positives (FP), False Negatives (FN), True Negatives (TN)
    TP = np.sum((actual_mask_flat == 1) & (predicted_mask_flat == 1))
    FP = np.sum((actual_mask_flat == 0) & (predicted_mask_flat == 1))
    FN = np.sum((actual_mask_flat == 1) & (predicted_mask_flat == 0))
    TN = np.sum((actual_mask_flat == 0) & (predicted_mask_flat == 0))

    # Calculate metrics
    precision = TP / (TP + FP) if (TP + FP) > 0 else 0
    recall = TP / (TP + FN) if (TP + FN) > 0 else 0
    f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    iou = TP / (TP + FP + FN) if (TP + FP + FN) > 0 else 0

    results.append({
        'image_id': image_id,
        'actual_water_pixels': actual_water_pixels,
        'predicted_water_pixels': predicted_water_pixels,
        'TP': TP,
        'FP': FP,
        'FN': FN,
        'TN': TN,
        'precision': precision,
        'recall': recall,
        'f1_score': f1_score,
        'iou': iou
    })

    # Save mask images to specified directories
    actual_mask_path = os.path.join(actual_masks_dir, f'actual_mask_{image_id}.png')
    predicted_mask_path = os.path.join(predicted_masks_dir, f'predicted_mask_{image_id}.png')
    cv2.imwrite(actual_mask_path, mask_image)
    cv2.imwrite(predicted_mask_path, predicted_mask)

# Save results to CSV
df = pd.DataFrame(results)
df.to_csv(output_csv_path, index=False)
print(df)

# Ocalculate and print average metrics over all images
average_metrics = df[['precision', 'recall', 'f1_score', 'iou']].mean()
print("\nAverage metrics over all images:")
print(average_metrics)

## 8. Test on video

In [None]:

#Taken from https://stackoverflow.com/questions/60663073/how-can-i-properly-run-detectron2-on-videos

import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()
# import libraries
import numpy as np
import tqdm
import cv2
# import detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.video_visualizer import VideoVisualizer
from detectron2.utils.visualizer import ColorMode, Visualizer
from detectron2.data import MetadataCatalog
import time

# Extract video properties
video = cv2.VideoCapture('/content/drive/MyDrive/Project/Google Timelapse/Mareb3.mp4')
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS)
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))

# Initialize video writer
video_writer = cv2.VideoWriter('/content/drive/MyDrive/Project/Google Timelapse/mareb3out.mp4', fourcc=cv2.VideoWriter_fourcc(*"mp4v"), fps=float(frames_per_second), frameSize=(width, height), isColor=True)

# Initialize predictor
#cfg = get_cfg()
#cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
#cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.1  # set threshold for this model
#cfg.MODEL.WEIGHTS = os.path.join("/content/drive/MyDrive/Project/MaskR_CNN_Modelv6", "model_final.pth")
predictor = DefaultPredictor(cfg)

# Initialize visualizer
v = VideoVisualizer(MetadataCatalog.get(cfg.DATASETS.TRAIN[0]), ColorMode.IMAGE)

def runOnVideo(video, maxFrames):
    """ Runs the predictor on every frame in the video (unless maxFrames is given),
    and returns the frame with the predictions drawn.
    """

    readFrames = 0
    while True:
        hasFrame, frame = video.read()
        if not hasFrame:
            break

        # Get prediction results for this frame
        outputs = predictor(frame)

        # Make sure the frame is colored
        frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)

        # Draw a visualization of the predictions using the video visualizer
        visualization = v.draw_instance_predictions(frame, outputs["instances"].to("cpu"))

        # Convert Matplotlib RGB format to OpenCV BGR format
        visualization = cv2.cvtColor(visualization.get_image(), cv2.COLOR_RGB2BGR)

        yield visualization

        readFrames += 1
        if readFrames > maxFrames:
            break

# Create a cut-off for debugging
num_frames = 120

# Enumerate the frames of the video
for visualization in tqdm.tqdm(runOnVideo(video, num_frames), total=num_frames):

    # Write test image
    cv2.imwrite('/content/drive/MyDrive/Project/Google Timelapse/POSE detectron2.png', visualization)

    # Write to video file
    video_writer.write(visualization)

# Release resources
video.release()
video_writer.release()
cv2.destroyAllWindows()