<a href="https://colab.research.google.com/github/Zakeerullah/Zaki1/blob/main/Prediction_on_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import cv2  # for video processing
from torchvision import transforms
import torch.nn as nn
from torchvision import models
import torch.nn.functional as F
import numpy as np


In [7]:
class YourModelClass(nn.Module):
    def __init__(self, num_classes=2):
        super(YourModelClass, self).__init__()
        # Load pretrained AlexNet
        alexnet = models.alexnet(pretrained=True)

        # Replace the classifier with a new one (the one you've defined)
        self.features = alexnet.features
        self.classifier = nn.Sequential(
            nn.Linear(9216, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),  # Add dropout layer
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(0.5),  # Add another dropout layer
            nn.Linear(4096, num_classes)
        )

    def forward(self, x):
        x = self.features(x)  # Pass the input through the feature extractor
        x = x.view(x.size(0), -1)  # Flatten the output for the classifier
        x = self.classifier(x)  # Pass the flattened output through the classifier
        return x


In [8]:
# Step 1: Load the trained model
model = YourModelClass(num_classes=2)
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/Model/Alexnet_model.pth'))
model.eval()  # Set the model to evaluation mode

YourModelClass(
  (features): Sequential(
    (0): Conv2d(3, 64, kernel_size=(11, 11), stride=(4, 4), padding=(2, 2))
    (1): ReLU(inplace=True)
    (2): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(64, 192, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (4): ReLU(inplace=True)
    (5): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(192, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU(inplace=True)
    (8): Conv2d(384, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (9): ReLU(inplace=True)
    (10): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (11): ReLU(inplace=True)
    (12): MaxPool2d(kernel_size=3, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classifier): Sequential(
    (0): Linear(in_features=9216, out_features=4096, bias=True)
    (1): ReLU()
    (2): Dropout(p=0.5, inplace=False)
    (3): Linear(in_features=4

In [10]:
# Define the transformation steps
preprocess = transforms.Compose([
    transforms.ToPILImage(),  # Convert a tensor or an ndarray to PIL Image
    transforms.Resize(256),  # Resize the image to 256x256 pixels
    transforms.CenterCrop(224),  # Crop the image to 224x224 pixels about the center
    transforms.ToTensor(),  # Convert the image to a tensor with pixels in the range [0, 1]
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),  # Normalize with ImageNet's mean and std
])

# Step 2: Preprocess video frames (assuming you have a function for this)
def preprocess_frame(frame):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert frame from BGR to RGB
    frame = preprocess(frame)  # Apply the preprocessing steps
    return frame.unsqueeze(0)

# Step 3: Predict each frame
def predict_video(video_path):
    cap = cv2.VideoCapture(video_path)
    predictions = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break
        processed_frame = preprocess_frame(frame)
        with torch.no_grad():  # Ensure no gradients are calculated
            outputs = model(processed_frame)
            probabilities = F.softmax(outputs, dim=1)  # Apply softmax to convert logits to probabilities
            _, predicted = torch.max(probabilities, 1)  # Get the predicted class index
            predictions.append(predicted.item())

    cap.release()
    return predictions


# Step 4: Majority voting to determine video label
def get_video_label(predictions):
    # Assuming 1 for real and 0 for fake
    threshold = len(predictions) / 2
    return 'real' if sum(predictions) > threshold else 'fake'



# Step 5: Tagging video with label
def tag_video(video_path, label):
    cap = cv2.VideoCapture(video_path)
    out = None

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        if out is None:
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter('tagged_video_01.mp4', fourcc, cap.get(cv2.CAP_PROP_FPS),
                                  (frame.shape[1], frame.shape[0]))


        text = label
        font_scale = 4
        thickness = 8
        font = cv2.FONT_HERSHEY_SIMPLEX

        # Get the width and height of the text box
        (text_width, text_height), baseline = cv2.getTextSize(text, font, font_scale, thickness)

        # Calculate the starting x and y coordinates to center the text
        startX = (frame.shape[1] - text_width) // 2
        startY = (frame.shape[0] + text_height) // 2
        # Add label to frame
        cv2.putText(frame, text, (startX, startY), font,
            font_scale, (0, 255, 0), thickness, cv2.LINE_AA)

        out.write(frame)

    cap.release()
    out.release()


# Example usage:
video_path = '/content/drive/MyDrive/Colab Notebooks/0test1.mp4'
predictions = predict_video(video_path)
video_label = get_video_label(predictions)
tag_video(video_path, video_label)

# Save model
#torch.save(model.state_dict(), 'Prediction_model.pth')
#model_save_path = '/content/drive/MyDrive/Colab Notebooks/Model/Prediction_model.pth'
#torch.save(model.state_dict(), model_save_path)





Using both vgg16 and alexnet

In [None]:
# @title
##(import torch
import cv2  # for video processing
from torchvision import transforms
import torch.nn as nn
from torchvision import models

# Define your AlexNet-based model class
class YourAlexNetModelClass(nn.Module):
    # ... (same as before)

# Define your VGG16-based model class
class YourVGG16ModelClass(nn.Module):
    def __init__(self, num_classes=2):
        super(YourVGG16ModelClass, self).__init__()
        # Load pretrained VGG16
        vgg16 = models.vgg16(pretrained=True)

        # Replace the classifier with a new one (the one you've defined)
        self.features = vgg16.features
        self.classifier = nn.Sequential(
            # ... (define your classifier layers here)
        )

    def forward(self, x):
        x = self.features(x)  # Pass the input through the feature extractor
        x = x.view(x.size(0), -1)  # Flatten the output for the classifier
        x = self.classifier(x)  # Pass the flattened output through the classifier
        return x

# Load both trained models
alexnet_model = YourAlexNetModelClass(num_classes=2)
alexnet_model.load_state_dict(torch.load('Alexnet_model.pth'))
alexnet_model.eval()  # Set to evaluation mode

vgg16_model = YourVGG16ModelClass(num_classes=2)
vgg16_model.load_state_dict(torch.load('VGG16_model.pth'))
vgg16_model.eval()  # Set to evaluation mode

# Define preprocessing steps for each model if they differ
preprocess_alexnet = transforms.Compose([
    # ... (same as before for AlexNet)
])

preprocess_vgg16 = transforms.Compose([
    # ... (define preprocessing steps for VGG16 if different)
])

# Modify preprocess_frame function to handle both models
def preprocess_frame(frame, model_name):
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)  # Convert frame from BGR to RGB

    if model_name == 'alexnet':
        frame = preprocess_alexnet(frame)  # Apply AlexNet preprocessing steps
    elif model_name == 'vgg16':
        frame = preprocess_vgg16(frame)  # Apply VGG16 preprocessing steps

    return frame.unsqueeze(0)

# Modify predict_video function to get predictions from both models
def predict_video(video_path):
    cap = cv2.VideoCapture(video_path)
    alexnet_predictions = []
    vgg16_predictions = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        processed_frame_alexnet = preprocess_frame(frame, 'alexnet')
        processed_frame_vgg16 = preprocess_frame(frame, 'vgg16')

        with torch.no_grad():  # Ensure no gradients are calculated
            alexnet_prediction = alexnet_model(processed_frame_alexnet)
            vgg16_prediction = vgg16_model(processed_frame_vgg16)

            alexnet_predictions.append(alexnet_prediction.item())
            vgg16_predictions.append(vgg16_prediction.item())

    cap.release()
    return alexnet_predictions, vgg16_predictions

# Modify get_video_label function to handle predictions from both models
def get_video_label(alexnet_predictions, vgg16_predictions):
    combined_predictions = []

    for alex_pred, vgg_pred in zip(alexnet_predictions, vgg16_predictions):
        combined_pred = (alex_pred + vgg_pred) / 2  # Average predictions from both models
        combined_predictions.append(combined_pred)

    threshold = len(combined_predictions) / 2
    return 'real' if sum(combined_predictions) > threshold else 'fake'

# The rest of your code remains unchanged

# Example usage:
video_path = 'path_to_your_video.mp4'
alexnet_preds, vgg16_preds = predict_video(video_path)
video_label = get_video_label(alexnet_preds, vgg16_preds)
tag_video(video_path, video_label)

###this code is incomplete###
