## Inference | Tiny-Mamba Yolo


In [1]:
# Import all dependencies.
import cv2
import numpy as np
from PIL import Image, ImageEnhance
from ultralytics import YOLO
import supervision as sv

'''
    We evalulate our model on transformed youtube video.
    This code contains the followings:
        1. Transform downloaded youtube video into train-preprocessed video style.
        2. Run the model on the video. 
'''


#### Transformation function

In [8]:
def transform(image):
    '''
        :image: input image frame
        
        :resize to 1024 by 1024, apply sharpness, contrastness.

        return transformed image
    '''
    img = image.resize((1024, 1024), Image.Resampling.LANCZOS)
    sharp = ImageEnhance.Sharpness(img)
    img = sharp.enhance(6) 
    contrast = ImageEnhance.Contrast(img)
    img = contrast.enhance(6)  
    return img


Video saved as videos/video_transformed.mp4


#### Define input_video and transformed_video's path

In [None]:

''' 
    We downloaded the video from youtube.
    We transform the video and store it at output_video_path
    same as we transformed the training dataset
'''
input_video_path = 'videos/video.mp4' 
output_video_path = 'videos/video_transformed.mp4'



#### Transform each frame and store it

In [None]:

cap = cv2.VideoCapture(input_video_path)
fps = cap.get(cv2.CAP_PROP_FPS)
out = cv2.VideoWriter(output_video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (1024, 1024))  

while True: 
    '''
        gets every frame and apply transform,
        and store it in 'video_transformed.mp4'.
    '''
    ret, frame = cap.read()
    if not ret:
        break  
    frame_pil = Image.fromarray(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
    transformed_frame = transform(frame_pil)
    transformed_frame_cv = cv2.cvtColor(np.array(transformed_frame), cv2.COLOR_RGB2BGR)
    out.write(transformed_frame_cv)

# release memory
cap.release()
out.release()
cv2.destroyAllWindows()

print(f"Video saved as {output_video_path}")


In [None]:
### Define model Path
MODEL_PATH = "models/last.pt"
model = YOLO(MODEL_PATH)
print(model) # same as model.info(). Give summary of the model

In [None]:

### A function that gets a frame generator from model

def predict(source, model):
    ##### Inference on transformed Video or Raw Youtube video. (Raw youtube requires pytube.)
    #### Source can be local path, or youtube link.
    ### Local path (output_video_path) is expected for model trained on transformed data.
    results = model.predict(source, stream=True,imgsz=(1024,1024))
    return results  # return frames as a generator.

### Display each frame from "results" generator.

In [None]:
results = predict(output_video_path, model)
for result in results:
    boxes = result.boxes  
    masks = result.masks 
    keypoints = result.keypoints 
    probs = result.probs 
    obb = result.obb 
    result.show() # Display each frame