# Code to  crate 3D digital twin form live realtime feed of came

In [None]:
# Install necessary dependencies
!pip install torch torchvision opencv-python matplotlib open3d numpy ultralytics transformers timm pillow

# Import required libraries
import os
import cv2
import torch
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import open3d as o3d
import time
from ultralytics import YOLO
from transformers import DPTFeatureExtractor, DPTForDepthEstimation
from google.colab import files
from IPython.display import display, HTML
from tqdm.notebook import tqdm

# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Define a simple depth estimation model
class DepthEstimator(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.encoder = torch.nn.Sequential(
            torch.nn.Conv2d(3, 64, 3, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(2),
            torch.nn.Conv2d(64, 128, 3, padding=1),
            torch.nn.ReLU()
        )
        self.decoder = torch.nn.Sequential(
            torch.nn.Conv2d(128, 64, 3, padding=1),
            torch.nn.ReLU(),
            torch.nn.Upsample(scale_factor=2),
            torch.nn.Conv2d(64, 1, 3, padding=1)
        )

    def forward(self, x):
        return self.decoder(self.encoder(x))

# Video to Point Cloud Converter
class VideoToTwinConverter:
    def __init__(self, batch_size=32):
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.depth_model = DepthEstimator().to(self.device)
        self.transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])
        self.batch_size = batch_size

    def _generate_pointcloud(self, rgb, depth):
        h, w = depth.shape
        fx, fy = 0.8*w, 0.8*h  # Simplified focal length

        yy, xx = np.meshgrid(np.arange(h), np.arange(w), indexing='ij')
        z = depth * 100  # Scale factor
        x = (xx - w/2) * z / fx
        y = (yy - h/2) * z / fy

        points = np.vstack((x.ravel(), y.ravel(), z.ravel())).T
        colors = rgb.reshape(-1, 3)/255.0

        pcd = o3d.geometry.PointCloud()
        pcd.points = o3d.utility.Vector3dVector(points)
        pcd.colors = o3d.utility.Vector3dVector(colors)
        return pcd

    def process_frames(self, frames):
        # Preprocess frames
        input_tensors = [self.transform(frame).unsqueeze(0) for frame in frames]
        input_tensors = torch.cat(input_tensors).to(self.device)

        # Depth prediction
        with torch.no_grad():
            depths = self.depth_model(input_tensors).squeeze().cpu().numpy()

        # Generate point clouds
        pointclouds = [self._generate_pointcloud(frames[i], depths[i]) for i in range(len(frames))]
        return pointclouds

# Main Execution
if __name__ == "__main__":
    # Initialize components
    converter = VideoToTwinConverter(batch_size=32)

    # Open the video file
    cap = cv2.VideoCapture('/path/to/your/video.mp4')  # Update path

    frames = []
    start_time = time.time()
    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        frames.append(frame)

        # Process frames in batches
        if len(frames) == converter.batch_size:
            pointclouds = converter.process_frames(frames)
            frames = []

            # Visualize point clouds using Open3D
            for pcd in pointclouds:
                o3d.visualization.draw_geometries([pcd])

            # Monitor system and GPU memory usage
            system_memory = psutil.virtual_memory()
            gpu_memory = torch.cuda.memory_allocated()
            print(f"System Memory Usage: {system_memory.percent}%")
            print(f"GPU Memory Usage: {gpu_memory / (1024 ** 2)} MB")

    # Process remaining frames
    if frames:
        pointclouds = converter.process_frames(frames)
        for pcd in pointclouds:
            o3d.visualization.draw_geometries([pcd])

    cap.release()
    end_time = time.time()
    print(f"Total processing time: {end_time - start_time} seconds")