In [None]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.models.detection.faster_rcnn import FastRCNNPredictor
import torch.optim as optim
import torch

classes = ["shuttle_main"]
num_classes = len(classes) + 1
model = fasterrcnn_resnet50_fpn(pretrained=False, num_classes=num_classes)


model.load_state_dict(torch.load("./faster_rcnn_model_2.pth"))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)

In [None]:
import torch.nn as nn
import torchvision.models as models


class QuaternionModel(nn.Module):
    def __init__(self):
        super(QuaternionModel, self).__init__()
        self.base_model = models.resnet50(pretrained=True)
        self.base_model.fc = nn.Sequential(
            nn.Linear(self.base_model.fc.in_features, 128),
            nn.ReLU(),
            nn.Linear(128, 4)
        )

    def forward(self, x):
        return self.base_model(x)


rot_model = QuaternionModel()


rot_model.load_state_dict(torch.load("./fine_tuned_quaternion_model.pth"))
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
rot_model = rot_model.to(device)

model.eval()

print("Model loaded successfully!")

In [3]:
from torchvision import transforms, models
class ResNetRotationModel(nn.Module):
    def __init__(self, pretrained=True):
        super(ResNetRotationModel, self).__init__()
        # Load a pre-trained ResNet model
        self.backbone = models.resnet18(pretrained=pretrained)
        num_features = self.backbone.fc.in_features
        # Replace the fully connected layer with a custom head for regression
        self.backbone.fc = nn.Linear(num_features, 3)  # Output x, y, z rotations

    def forward(self, x):
        return self.backbone(x)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


In [None]:

res18_model = ResNetRotationModel(pretrained=True).to(device)
res18_model.load_state_dict(torch.load('./rotation_resnet18_fine_tuned.pth'))
res18_model.eval()

In [35]:
def convert_quaternion_to_euler(quaternion):
  quaternion = torch.tensor(quaternion)
  quaternion = quaternion.unsqueeze(0)
  qx, qy, qz, qw = quaternion[:, 0], quaternion[:, 1], quaternion[:, 2], quaternion[:, 3]


  roll = torch.atan2(2 * (qw * qx + qy * qz), 1 - 2 * (qx**2 + qy**2))


  pitch = torch.asin(2 * (qw * qy - qz * qx))


  yaw = torch.atan2(2 * (qw * qz + qx * qy), 1 - 2 * (qy**2 + qz**2))

  roll_deg = torch.rad2deg(roll)
  pitch_deg = torch.rad2deg(pitch)
  yaw_deg = torch.rad2deg(yaw)


  euler_degrees = torch.stack([roll_deg, pitch_deg, yaw_deg], dim=1)
  return euler_degrees[0].squeeze(0).numpy()

In [15]:
import cv2
import torch
import numpy as np
from cv2 import imshow
from torchvision import transforms
from PIL import Image

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


def preprocess_frame(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = cv2.resize(frame, (640, 640))
    frame = frame / 255.0
    input_tensor = torch.tensor(frame.transpose(2, 0, 1), dtype=torch.float32).unsqueeze(0)
    input_tensor = input_tensor.to(device)
    return input_tensor


def draw_boxes(frame, predictions, confidence_threshold=0.90):
    frame = cv2.resize(frame, (640, 640))
    cropped_image = None
    for box, score in zip(predictions["boxes"], predictions["scores"]):
        if score > confidence_threshold:
            # Get the bounding box values
            xmin, ymin, xmax, ymax = map(int, box.cpu().numpy())
            # Get cropped image for rotation detection
            cropped_image = frame[ymin:ymax, xmin:xmax]
            # Preprocess the image based on your model
            cropped_image = cv2.cvtColor(cropped_image, cv2.COLOR_BGR2RGB)
            cropped_image = Image.fromarray(cropped_image)
            cropped_image = transform(cropped_image)
            cropped_image = cropped_image.unsqueeze(0)
            cropped_image = cropped_image.to(device)
            # Pass image to rotation model
            with torch.no_grad():
                output = res18_model(cropped_image)
                output = output[0].cpu().numpy()
                output = output * 180
                # output = convert_quaternion_to_euler(output) # get the angles to draw on the frame
            cv2.rectangle(frame, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
            cv2.putText(frame, f"{output}", (xmin, ymin - 10),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 1)
    return frame


In [None]:
import cv2
import torch


try:
    cap = cv2.VideoCapture('/dev/video0')

    if not cap.isOpened():
        print("Error: Could not access the webcam.")
    else:
        print("Press 'q' to stop the webcam feed or use Ctrl+C.")
        while True:
            ret, frame = cap.read()
            if not ret:
                print("Error: Failed to capture frame.")
                break

            input_tensor = preprocess_frame(frame)
            # get bounding box prediction
            with torch.no_grad():
                model.eval()
                prediction = model(input_tensor)[0]
            # Pass predictions to draw boxes and get rotation angles
            frame_with_boxes = draw_boxes(frame, prediction, confidence_threshold=0.99)

            cv2.imshow("Webcam Feed", frame_with_boxes)

            if cv2.waitKey(1) & 0xFF == ord('q'):
                break

except KeyboardInterrupt:
    print("\nKeyboard Interrupt detected. Exiting...")

finally:
    cap.release()
    cv2.destroyAllWindows()
    print("Released video capture and closed all windows.")


In [19]:
cap.release()
cv2.destroyAllWindows()

In [None]:
import cv2

cap = cv2.VideoCapture('/dev/video0')

if not cap.isOpened():
    print("Error: Could not open the camera.")
else:
    print("Camera opened successfully.")
    ret, frame = cap.read()
    if ret:
        print("Frame captured successfully.")
    else:
        print("Error: Frame not captured.")
cap.release()


Camera opened successfully.
Frame captured successfully.
