In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.optimizers import Adam

def load_and_preprocess_videos(video_folder, label, max_frames=30, frame_size=(224, 224)):
    videos = []
    labels = []
    
    if not os.path.exists(video_folder):
        raise FileNotFoundError(f"Directory not found: {video_folder}")
    
    for video_file in os.listdir(video_folder):
        video_path = os.path.join(video_folder, video_file)
        
        # Read video
        cap = cv2.VideoCapture(video_path)
        if not cap.isOpened():
            print(f"Warning: Could not open video {video_path}")
            continue
            
        frames = []
        frame_count = 0
        
        while True:
            ret, frame = cap.read()
            if not ret or frame_count >= max_frames:
                break
                
            # Convert BGR to RGB
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            # Preprocess frame
            frame = cv2.resize(frame, frame_size)
            frame = frame / 255.0  # Normalize
            frames.append(frame)
            frame_count += 1
            
        # Pad if video is shorter than max_frames
        if len(frames) < max_frames:
            padding = [np.zeros_like(frames[0]) for _ in range(max_frames - len(frames))]
            frames.extend(padding)
            
        videos.append(np.array(frames))
        labels.append(label)
        
        cap.release()
    
    return np.array(videos, dtype=np.float32), np.array(labels)

# Load datasets - replace with your actual folder paths
real_videos, real_labels = load_and_preprocess_videos('Real', 0)
fake_videos, fake_labels = load_and_preprocess_videos('Fake', 1)

# Combine datasets
X = np.concatenate((real_videos, fake_videos), axis=0)
y = np.concatenate((real_labels, fake_labels), axis=0)

# Split into train and test sets with shuffling
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    random_state=42,
    shuffle=True,
    stratify=y
)

class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, embed_dim, **kwargs):
        super().__init__(**kwargs)
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length,
            output_dim=embed_dim
        )
        self.sequence_length = sequence_length
        self.embed_dim = embed_dim
    
    def call(self, inputs):
        positions = tf.range(start=0, limit=self.sequence_length, delta=1)
        embedded_positions = self.position_embeddings(positions)
        return inputs + embedded_positions
    
    def compute_mask(self, inputs, mask=None):
        return mask

class TransformerEncoder(layers.Layer):
    def __init__(self, num_heads, embed_dim, dense_dim, dropout=0.1, **kwargs):
        super().__init__(**kwargs)
        self.attention = layers.MultiHeadAttention(
            num_heads=num_heads,
            key_dim=embed_dim,
            dropout=dropout
        )
        self.dense_proj = models.Sequential([
            layers.Dense(dense_dim, activation='relu'),
            layers.Dense(embed_dim)
        ])
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.dropout_1 = layers.Dropout(dropout)
        self.dropout_2 = layers.Dropout(dropout)
    
    def call(self, inputs, mask=None):
        attention_output = self.attention(
            inputs, inputs, attention_mask=mask
        )
        attention_output = self.dropout_1(attention_output)
        proj_input = self.layernorm_1(inputs + attention_output)
        
        proj_output = self.dense_proj(proj_input)
        proj_output = self.dropout_2(proj_output)
        return self.layernorm_2(proj_input + proj_output)

def create_video_transformer_model(input_shape=(30, 224, 224, 3), num_classes=1):
    # Input layer
    inputs = layers.Input(shape=input_shape)
    
    # CNN backbone for spatial feature extraction (using EfficientNet)
    cnn_backbone = tf.keras.applications.EfficientNetV2B0(
        include_top=False,
        weights='imagenet',
        input_shape=input_shape[1:],
        pooling='avg'
    )
    cnn_backbone.trainable = True
    
    # Process each frame through CNN
    # More efficient implementation using TimeDistributed
    x = layers.TimeDistributed(cnn_backbone)(inputs)
    
    # Positional embedding
    positional_embedding = PositionalEmbedding(
        sequence_length=input_shape[0],
        embed_dim=x.shape[-1]
    )
    x = positional_embedding(x)
    
    # Transformer encoder
    x = TransformerEncoder(
        num_heads=8,
        embed_dim=x.shape[-1],
        dense_dim=512,
        dropout=0.1
    )(x)
    
    # Global average pooling
    x = layers.GlobalAveragePooling1D()(x)
    
    # Classification head
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='sigmoid')(x)
    
    model = models.Model(inputs, outputs)
    return model

# Create model
model = create_video_transformer_model()
model.summary()

# Compile model with better settings
model.compile(
    optimizer=Adam(learning_rate=1e-4),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
)

# Add callbacks
callbacks = [
    tf.keras.callbacks.ModelCheckpoint(
        'best_model.h5',
        save_best_only=True,
        monitor='val_accuracy',
        mode='max'
    ),
    tf.keras.callbacks.EarlyStopping(
        patience=5,
        restore_best_weights=True,
        monitor='val_accuracy'
    ),
    tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.2,
        patience=3,
        min_lr=1e-6
    )
]

# Train model with proper batch size
history = model.fit(
    X_train,
    y_train,
    batch_size=8,
    epochs=30,
    validation_data=(X_test, y_test),
    callbacks=callbacks,
    verbose=1
)

# Evaluate model
loss, accuracy, auc = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy*100:.2f}%")
print(f"Test AUC: {auc*100:.2f}%")

# Improved inference function
def predict_deepfake(video_path, model, max_frames=30, frame_size=(224, 224)):
    # Preprocess video
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        return "Error: Could not open video file"
        
    frames = []
    frame_count = 0
    
    while True:
        ret, frame = cap.read()
        if not ret or frame_count >= max_frames:
            break
            
        frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        frame = cv2.resize(frame, frame_size)
        frame = frame / 255.0
        frames.append(frame)
        frame_count += 1
        
    cap.release()
    
    # Pad if needed
    if len(frames) < max_frames:
        padding = [np.zeros_like(frames[0]) for _ in range(max_frames - len(frames))]
        frames.extend(padding)
        
    frames = np.array(frames)
    frames = np.expand_dims(frames, axis=0)  # Add batch dimension
    
    # Predict
    prediction = model.predict(frames, verbose=0)[0][0]
    probability = prediction * 100
    
    if probability > 50:
        return f"FAKE ({probability:.2f}% confidence)"
    else:
        return f"REAL ({100 - probability:.2f}% confidence)"




Epoch 1/30
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22s/step - accuracy: 0.5691 - auc: 0.5849 - loss: 0.9904 

In [None]:


import os
import cv2
import torch
import numpy as np
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from tqdm import tqdm
from facenet_pytorch import MTCNN
from einops import rearrange
from torch.nn import TransformerEncoder, TransformerEncoderLayer

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Frame extraction and face detection class
class FaceExtractor:
    def __init__(self, face_size=224):
        self.face_size = face_size
        # Initialize MTCNN for face detection
        self.mtcnn = MTCNN(
            image_size=face_size,
            margin=40,
            min_face_size=50,
            thresholds=[0.6, 0.7, 0.7],
            factor=0.709,
            post_process=True,
            device=device
        )
        
    def extract_faces_from_video(self, video_path, max_frames=32):
        """Extract faces from video frames"""
        faces = []
        cap = cv2.VideoCapture(video_path)
        frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        # Calculate sampling rate to get evenly distributed frames
        if frame_count <= max_frames:
            sampling_rate = 1
        else:
            sampling_rate = frame_count // max_frames
            
        frame_indices = range(0, frame_count, sampling_rate)[:max_frames]
        
        for i in frame_indices:
            cap.set(cv2.CAP_PROP_POS_FRAMES, i)
            ret, frame = cap.read()
            if not ret:
                break
                
            # Convert BGR to RGB
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
            
            # Detect face and get the cropped face and bounding box
            try:
                face = self.mtcnn(frame)
                if face is not None:
                    faces.append(face)
            except Exception as e:
                print(f"Error detecting face: {e}")
                continue
                
        cap.release()
        
        # Ensure we have faces
        if not faces:
            return None
        
        # Stack frames - only take up to max_frames
        face_frames = torch.stack(faces[:max_frames])
        
        # Pad if we have fewer frames
        if face_frames.shape[0] < max_frames:
            padding = torch.zeros(max_frames - face_frames.shape[0], 3, self.face_size, self.face_size)
            face_frames = torch.cat([face_frames, padding], dim=0)
            
        return face_frames

# Dataset class
class DeepfakeDataset(Dataset):
    def __init__(self, real_dir, fake_dir, max_frames=32, face_size=224, transform=None, max_videos=None):
        self.real_videos = [os.path.join(real_dir, f) for f in os.listdir(real_dir) 
                           if f.endswith(('.mp4', '.avi', '.mov'))]
        self.fake_videos = [os.path.join(fake_dir, f) for f in os.listdir(fake_dir) 
                           if f.endswith(('.mp4', '.avi', '.mov'))]
        
        # Limit dataset size if specified
        if max_videos:
            self.real_videos = self.real_videos[:max_videos]
            self.fake_videos = self.fake_videos[:max_videos]
            
        self.videos = self.real_videos + self.fake_videos
        self.labels = [0] * len(self.real_videos) + [1] * len(self.fake_videos)  # 0 for real, 1 for fake
        
        self.max_frames = max_frames
        self.face_extractor = FaceExtractor(face_size)
        self.transform = transform
        
        # Create a cache to store extracted faces
        self.cache = {}
        
    def __len__(self):
        return len(self.videos)
    
    def __getitem__(self, idx):
        video_path = self.videos[idx]
        label = self.labels[idx]
        
        # Check if we have this video in cache
        if video_path in self.cache:
            faces = self.cache[video_path]
        else:
            faces = self.face_extractor.extract_faces_from_video(video_path, self.max_frames)
            
            # If no faces found, create a dummy batch
            if faces is None:
                faces = torch.zeros(self.max_frames, 3, 224, 224)
                
            # Cache the extracted faces
            self.cache[video_path] = faces
        
        if self.transform:
            # Apply transforms to each frame
            transformed_faces = torch.stack([self.transform(face) for face in faces])
            return transformed_faces, torch.tensor(label, dtype=torch.long)
        else:
            return faces, torch.tensor(label, dtype=torch.long)

# Transformer-based model for deepfake detection
class TransformerDeepfakeDetector(nn.Module):
    def __init__(self, num_frames=32, hidden_dim=768, num_heads=8, num_layers=6, dropout=0.1):
        super(TransformerDeepfakeDetector, self).__init__()
        
        # ResNet feature extractor (using a smaller ResNet to save VRAM)
        self.feature_extractor = torch.hub.load('pytorch/vision:v0.10.0', 'resnet18', pretrained=True)
        # Remove the final classification layer
        self.feature_extractor = nn.Sequential(*list(self.feature_extractor.children())[:-1])
        
        # Get the output dimension from the feature extractor
        self.feature_dim = 512  # ResNet18's output dim
        
        # Project features to hidden dimension
        self.projection = nn.Linear(self.feature_dim, hidden_dim)
        
        # Position encoding
        self.pos_encoder = PositionalEncoding(hidden_dim, dropout)
        
        # Transformer encoder layers
        encoder_layers = TransformerEncoderLayer(d_model=hidden_dim, nhead=num_heads, 
                                              dim_feedforward=hidden_dim*4, dropout=dropout)
        self.transformer_encoder = TransformerEncoder(encoder_layers, num_layers)
        
        # Classification head
        self.classifier = nn.Sequential(
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim // 2, 1)
        )
        
        self.num_frames = num_frames
        self.hidden_dim = hidden_dim
        
    def forward(self, x):
        batch_size, frames, channels, height, width = x.shape
        
        # Process each frame with the feature extractor
        # Reshape to process all frames at once
        x = x.view(batch_size * frames, channels, height, width)
        features = self.feature_extractor(x)
        features = features.view(batch_size * frames, self.feature_dim)
        
        # Project and reshape back to [batch, frames, hidden]
        features = self.projection(features)
        features = features.view(batch_size, frames, self.hidden_dim)
        
        # Transpose for transformer input [frames, batch, hidden]
        features = features.transpose(0, 1)
        
        # Add positional encoding
        features = self.pos_encoder(features)
        
        # Pass through transformer
        encoded = self.transformer_encoder(features)
        
        # Use global representation (mean of all frames)
        encoded = encoded.mean(dim=0)
        
        # Classification
        output = self.classifier(encoded)
        return output.squeeze(-1)

# Positional encoding for transformer
class PositionalEncoding(nn.Module):
    def __init__(self, d_model, dropout=0.1, max_len=1000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(p=dropout)
        
        position = torch.arange(max_len).unsqueeze(1)
        div_term = torch.exp(torch.arange(0, d_model, 2) * (-np.log(10000.0) / d_model))
        pe = torch.zeros(max_len, 1, d_model)
        pe[:, 0, 0::2] = torch.sin(position * div_term)
        pe[:, 0, 1::2] = torch.cos(position * div_term)
        self.register_buffer('pe', pe)
        
    def forward(self, x):
        """
        Arguments:
            x: Tensor, shape [seq_len, batch_size, embedding_dim]
        """
        x = x + self.pe[:x.size(0)]
        return self.dropout(x)

# Function to train the model
def train_model(model, train_loader, val_loader, num_epochs=10, lr=1e-4):
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=2)
    
    best_val_loss = float('inf')
    
    for epoch in range(num_epochs):
        # Training phase
        model.train()
        train_loss = 0.0
        train_correct = 0
        train_total = 0
        
        for frames, labels in tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Training)"):
            frames = frames.to(device)
            labels = labels.float().to(device)
            
            optimizer.zero_grad()
            outputs = model(frames)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            pred = torch.sigmoid(outputs) >= 0.5
            train_correct += (pred == labels).sum().item()
            train_total += labels.size(0)
        
        train_loss /= len(train_loader)
        train_acc = train_correct / train_total
        
        # Validation phase
        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for frames, labels in tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} (Validation)"):
                frames = frames.to(device)
                labels = labels.float().to(device)
                
                outputs = model(frames)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                pred = torch.sigmoid(outputs) >= 0.5
                val_correct += (pred == labels).sum().item()
                val_total += labels.size(0)
        
        val_loss /= len(val_loader)
        val_acc = val_correct / val_total
        
        # Update scheduler
        scheduler.step(val_loss)
        
        print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {train_loss:.4f}, Train Acc: {train_acc:.4f}, "
              f"Val Loss: {val_loss:.4f}, Val Acc: {val_acc:.4f}")
        
        # Save the best model
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            torch.save(model.state_dict(), 'best_deepfake_detector.pth')
            print("Saved best model checkpoint.")

# Function to evaluate on test set
def evaluate_model(model, test_loader):
    model.eval()
    test_correct = 0
    test_total = 0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for frames, labels in tqdm(test_loader, desc="Testing"):
            frames = frames.to(device)
            labels = labels.to(device)
            
            outputs = model(frames)
            pred = torch.sigmoid(outputs) >= 0.5
            
            test_correct += (pred == labels).sum().item()
            test_total += labels.size(0)
            
            all_preds.extend(pred.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    test_acc = test_correct / test_total
    print(f"Test Accuracy: {test_acc:.4f}")
    
    # Calculate confusion matrix and other metrics
    from sklearn.metrics import confusion_matrix, precision_recall_fscore_support
    
    cm = confusion_matrix(all_labels, all_preds)
    precision, recall, f1, _ = precision_recall_fscore_support(all_labels, all_preds, average='binary')
    
    print(f"Confusion Matrix:\n{cm}")
    print(f"Precision: {precision:.4f}")
    print(f"Recall: {recall:.4f}")
    print(f"F1 Score: {f1:.4f}")

# Main function to orchestrate the training and evaluation
def main():
    # Parameters
    batch_size = 4  # Smaller batch size to fit in VRAM
    max_frames = 32
    face_size = 224
    max_videos_per_class = None  # Set to a number if you want to limit dataset size
    
    # Data augmentation and transformation
    transform = transforms.Compose([
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
    
    # Create dataset
    print("Creating dataset...")
    dataset = DeepfakeDataset(
        real_dir="Real",
        fake_dir="Fake",
        max_frames=max_frames,
        face_size=face_size,
        transform=transform,
        max_videos=max_videos_per_class
    )
    
    # Split dataset
    dataset_size = len(dataset)
    train_size = int(0.7 * dataset_size)
    val_size = int(0.15 * dataset_size)
    test_size = dataset_size - train_size - val_size
    
    train_dataset, val_dataset, test_dataset = torch.utils.data.random_split(
        dataset, [train_size, val_size, test_size]
    )
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
    
    # Initialize model
    print("Initializing model...")
    model = TransformerDeepfakeDetector(
        num_frames=max_frames,
        hidden_dim=512,  # Reduced dimension to save VRAM
        num_heads=8,
        num_layers=4,    # Reduced layers to save VRAM
        dropout=0.1
    ).to(device)
    
    # Count parameters
    total_params = sum(p.numel() for p in model.parameters())
    print(f"Total model parameters: {total_params:,}")
    
    # Train model
    print("Starting training...")
    train_model(model, train_loader, val_loader, num_epochs=15)
    
    # Load best model and evaluate
    print("Loading best model for evaluation...")
    model.load_state_dict(torch.load('best_deepfake_detector.pth'))
    evaluate_model(model, test_loader)

if __name__ == "__main__":
    main()

  from .autonotebook import tqdm as notebook_tqdm
Using cache found in C:\Users\araut1/.cache\torch\hub\pytorch_vision_v0.10.0


Using device: cpu
Creating dataset...
Initializing model...
Total model parameters: 24,180,289
Starting training...


Epoch 1/15 (Training):   0%|          | 0/14 [00:00<?, ?it/s]

Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscr

Epoch 1/15 (Training):   7%|▋         | 1/14 [00:07<01:39,  7.66s/it]

Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscr

Epoch 1/15 (Training):  14%|█▍        | 2/14 [00:14<01:29,  7.45s/it]

Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscr

Epoch 1/15 (Training):  21%|██▏       | 3/14 [00:22<01:23,  7.62s/it]

Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscr

Epoch 1/15 (Training):  29%|██▊       | 4/14 [00:30<01:17,  7.71s/it]

Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscriptable
Error detecting face: 'int' object is not subscr

In [4]:
pip install facenet_pytorch 

Defaulting to user installation because normal site-packages is not writeable
Collecting facenet_pytorch
  Using cached facenet_pytorch-2.6.0-py3-none-any.whl.metadata (12 kB)
Collecting numpy<2.0.0,>=1.24.0 (from facenet_pytorch)
  Using cached numpy-1.26.4.tar.gz (15.8 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Installing backend dependencies: started
  Installing backend dependencies: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'error'
Note: you may need to restart the kernel to use updated packages.


  error: subprocess-exited-with-error
  
  × Preparing metadata (pyproject.toml) did not run successfully.
  │ exit code: 1
  ╰─> [14 lines of output]
      + c:\Program Files\Python313\python.exe C:\Users\araut1\AppData\Local\Temp\pip-install-p15cgjld\numpy_0957b6ba99db41f09d52b835d29fb4c6\vendored-meson\meson\meson.py setup C:\Users\araut1\AppData\Local\Temp\pip-install-p15cgjld\numpy_0957b6ba99db41f09d52b835d29fb4c6 C:\Users\araut1\AppData\Local\Temp\pip-install-p15cgjld\numpy_0957b6ba99db41f09d52b835d29fb4c6\.mesonpy-_ron4h9j -Dbuildtype=release -Db_ndebug=if-release -Db_vscrt=md --native-file=C:\Users\araut1\AppData\Local\Temp\pip-install-p15cgjld\numpy_0957b6ba99db41f09d52b835d29fb4c6\.mesonpy-_ron4h9j\meson-python-native-file.ini
      The Meson build system
      Version: 1.2.99
      Source dir: C:\Users\araut1\AppData\Local\Temp\pip-install-p15cgjld\numpy_0957b6ba99db41f09d52b835d29fb4c6
      Build dir: C:\Users\araut1\AppData\Local\Temp\pip-install-p15cgjld\numpy_0957b6ba99