In [19]:
import pybullet as p
import pybullet_data
import cv2
import numpy as np
import os
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input, decode_predictions
from tensorflow.keras.preprocessing.image import load_img, img_to_array

# Setup directories
base_dir = "E:/UC-12564"
urdf_dir = os.path.join(base_dir, "urdf")
capture_dir = os.path.join(base_dir, "captures")
os.makedirs(capture_dir, exist_ok=True)

# Connect to PyBullet in DIRECT mode
p.connect(p.DIRECT)
p.setAdditionalSearchPath(pybullet_data.getDataPath())
p.resetSimulation()
p.setGravity(0, 0, -9.8)
p.loadURDF("plane.urdf")

# Load objects
positions = [(0, 0, 0.1), (0.3, 0, 0.1), (-0.3, 0, 0.1), (0.15, 0.3, 0.1),
             (-0.15, -0.3, 0.1), (0.3, -0.3, 0.1), (-0.3, 0.3, 0.1)]
shapes = ["cube"] * len(positions)

for idx, pos in enumerate(positions):
    urdf_file = os.path.join(urdf_dir, f"{shapes[idx]}.urdf")
    if os.path.exists(urdf_file):
        try:
            p.loadURDF(urdf_file, basePosition=pos)
        except Exception as e:
            print(f"[ERROR] Could not load {urdf_file}: {e}")
    else:
        print(f"[WARNING] URDF not found: {urdf_file}")

# Capture 10 diverse images
for i in range(10):
    yaw = i * 36
    pitch = -10 + (i % 3) * -20
    distance = 0.8 + (i % 2) * 0.3

    view_matrix = p.computeViewMatrixFromYawPitchRoll(
        cameraTargetPosition=[0.1 * (i % 3), -0.1 * (i % 2), 0.1],
        distance=distance,
        yaw=yaw,
        pitch=pitch,
        roll=0,
        upAxisIndex=2
    )
    proj_matrix = p.computeProjectionMatrixFOV(
        fov=60, aspect=1.0, nearVal=0.1, farVal=2.0
    )

    _, _, rgb_img, _, _ = p.getCameraImage(
        width=640, height=480,
        viewMatrix=view_matrix,
        projectionMatrix=proj_matrix
    )

    img = np.reshape(rgb_img, (480, 640, 4))[:, :, :3]
    img_path = os.path.join(capture_dir, f"cam_{i}.png")
    cv2.imwrite(img_path, img)

# Classify first image using MobileNetV2
img_path = os.path.join(capture_dir, "cam_0.png")
model = MobileNetV2(weights="imagenet")
img = load_img(img_path, target_size=(224, 224))
img_array = img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)
img_array = preprocess_input(img_array)

predictions = model.predict(img_array)
decoded = decode_predictions(predictions, top=3)[0]

print("Predicted classes:")
for pred in decoded:
    print(f"{pred[1]} ({pred[2]*100:.2f}%)")

# Properly disconnect the server at the end
p.disconnect()


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
Predicted classes:
desk (18.59%)
pool_table (6.19%)
envelope (4.48%)
