# Model

In [1]:
from torch.utils.data import Dataset
import pandas as pd
import torchaudio
import torch
import torchvision

In [2]:
import torch.nn as nn
import numpy as np
from torch.nn import init
import torch.nn.functional as F

import os
from glob import glob
from torchinfo import summary
import matplotlib.pyplot as plt

%matplotlib inline

In [3]:
import librosa
import librosa.display
import IPython.display as ipd

In [4]:
input_size = 661500 

In [5]:
xb = torch.randn(4, input_size)
print(xb.shape)
xb = xb.abs()
xb

torch.Size([4, 661500])


tensor([[1.3986, 1.3725, 0.2144,  ..., 0.5458, 0.2104, 0.9773],
        [0.8697, 0.5363, 0.0855,  ..., 0.7451, 2.2866, 2.1207],
        [1.2065, 0.7355, 0.5751,  ..., 0.6077, 0.1281, 0.1869],
        [0.1603, 0.4982, 0.3391,  ..., 0.7292, 0.0472, 0.9970]])

In [6]:
class CNN1DV1(nn.Module):
    def __init__(self, input_size):
        super(CNN1DV1, self).__init__()
        self.input_size = input_size
        self.c0 = nn.Conv1d(1, 64, kernel_size=3, stride=1, padding=1)
        self.c1 = nn.MaxPool1d(kernel_size=self.input_size, stride=1)
        self.c2 = nn.Flatten()
        self.c3 = nn.Linear(64, 128)
        self.c4 = nn.Linear(128, 4)
        self.c5 = nn.Softmax(dim=1)        

    def forward(self, x):
        x1 = self.c0(x)
        x2 = self.c1(x1)
        x3 = self.c2(x2)
        x4 = self.c3(x3)
        x5 = self.c4(x4)
        x6 = self.c5(x5)
        return x6


In [7]:
# Model Testing
x = torch.randn(1, 1, input_size).abs()  # Batch=1, Channels=1, Length=1099228
model_2 = CNN1DV1(input_size)
output = model_2(x)

print(output.shape)  # Expected Output: (1, 4)

torch.Size([1, 4])


In [8]:
output

tensor([[0.4554, 0.1494, 0.0859, 0.3094]], grad_fn=<SoftmaxBackward0>)

In [9]:
summary(model_2, input_size=(1, 1, input_size), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
CNN1DV1                                  [1, 4]                    --
├─Conv1d: 1-1                            [1, 64, 661500]           256
├─MaxPool1d: 1-2                         [1, 64, 1]                --
├─Flatten: 1-3                           [1, 64]                   --
├─Linear: 1-4                            [1, 128]                  8,320
├─Linear: 1-5                            [1, 4]                    516
├─Softmax: 1-6                           [1, 4]                    --
Total params: 9,092
Trainable params: 9,092
Non-trainable params: 0
Total mult-adds (Units.MEGABYTES): 169.35
Input size (MB): 2.65
Forward/backward pass size (MB): 338.69
Params size (MB): 0.04
Estimated Total Size (MB): 341.37

In [10]:
class CNN1DV2(nn.Module):
    def __init__(self, input_size):
        super(CNN1DV2, self).__init__()
        self.input_size = input_size
        self.c0_1 = nn.Conv1d(1, 64, kernel_size=3, stride=1, padding=1)
        self.c0_2 = nn.Conv1d(64, 256, kernel_size=3, stride=1, padding=1)
        self.c0_3 = nn.Conv1d(256, 256, kernel_size=3, stride=1, padding=1)
        self.c0_4 = nn.Conv1d(256, 64, kernel_size=3, stride=1, padding=1)
        self.c1 = nn.MaxPool1d(kernel_size=self.input_size, stride=1)
        self.c2 = nn.Flatten()
        self.c3_1 = nn.Linear(64, 128)
        self.c3_2 = nn.Linear(128, 128)
        self.c3_3 = nn.Linear(128, 128)
        self.c4 = nn.Linear(128, 4)
        self.c5 = nn.Softmax(dim=1)        

    def forward(self, x):
        x1_1 = self.c0_1(x)
        x1_2 = self.c0_2(x1_1)
        x1_3 = self.c0_3(x1_2)
        x1_4 = self.c0_4(x1_3)
        x2 = self.c1(x1_4)
        x3 = self.c2(x2)
        x4_1 = self.c3_1(x3)
        x4_2 = self.c3_2(x4_1)
        x4_3 = self.c3_3(x4_2)
        x5 = self.c4(x4_3)
        x6 = self.c5(x5)
        return x6


In [11]:
# Model Testing
input_size = 661500
  # Example input size
x = torch.randn(1, 1, input_size).abs()  # Batch=1, Channels=1, Length=1099228
model_2 = CNN1DV2(input_size)
output = model_2(x)

print(output.shape)  # Expected Output: (1, 4)

torch.Size([1, 4])


In [12]:
output

tensor([[0.2519, 0.2314, 0.2377, 0.2791]], grad_fn=<SoftmaxBackward0>)

In [13]:
summary(model_2, input_size=(1, 1, input_size), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
CNN1DV2                                  [1, 4]                    --
├─Conv1d: 1-1                            [1, 64, 661500]           256
├─Conv1d: 1-2                            [1, 256, 661500]          49,408
├─Conv1d: 1-3                            [1, 256, 661500]          196,864
├─Conv1d: 1-4                            [1, 64, 661500]           49,216
├─MaxPool1d: 1-5                         [1, 64, 1]                --
├─Flatten: 1-6                           [1, 64]                   --
├─Linear: 1-7                            [1, 128]                  8,320
├─Linear: 1-8                            [1, 128]                  16,512
├─Linear: 1-9                            [1, 128]                  16,512
├─Linear: 1-10                           [1, 4]                    516
├─Softmax: 1-11                          [1, 4]                    --
Total params: 337,604
Trainable params: 337,604
Non-trainab

In [14]:
# model testing

c0_1 = nn.Conv1d(1, 64, kernel_size=3, stride=1, padding=1)
c0_2 = nn.Conv1d(64, 256, kernel_size=3, stride=1, padding=1)
c0_3 = nn.Conv1d(256, 256, kernel_size=3, stride=1, padding=1)
c0_4 = nn.Conv1d(256, 64, kernel_size=3, stride=1, padding=1)
c1 = nn.MaxPool1d(kernel_size=input_size, stride=1)
c2 = nn.Flatten()
c3 = nn.Linear(64, 128)
c4 = nn.Linear(128, 4)
c5 = nn.Softmax(dim=1)


x = torch.randn(1,1,input_size)
print(x.shape)
x = x.abs()

x1_1 = c0_1(x)
x1_2 = c0_2(x1_1)
x1_3 = c0_3(x1_2)
x1_4 = c0_4(x1_3)
x2 = c1(x1_4)
x3 = c2(x2)
x4 = c3(x3)
x5 = c4(x4)
x6 = c5(x5)

outputs = [x1_1,x1_2,x1_3,x1_3,x1_4, x2, x3, x4, x5, x6]
for out in outputs:
    print(out.shape)
print(out)




torch.Size([1, 1, 661500])
torch.Size([1, 64, 661500])
torch.Size([1, 256, 661500])
torch.Size([1, 256, 661500])
torch.Size([1, 256, 661500])
torch.Size([1, 64, 661500])
torch.Size([1, 64, 1])
torch.Size([1, 64])
torch.Size([1, 128])
torch.Size([1, 4])
torch.Size([1, 4])
tensor([[0.2330, 0.3722, 0.2273, 0.1675]], grad_fn=<SoftmaxBackward0>)


In [15]:
x6

tensor([[0.2330, 0.3722, 0.2273, 0.1675]], grad_fn=<SoftmaxBackward0>)

In [14]:
class CNN1DV3(nn.Module):
    def __init__(self, input_size):
        super(CNN1DV3, self).__init__()
        self.input_size = input_size
        self.c1 = nn.Sequential(
           nn.Conv1d(1, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True)
        )
        self.c2 = nn.Sequential(
           nn.Conv1d(64, 256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True)
        )
        self.c3 = nn.Sequential(
           nn.Conv1d(256,256, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(256),
            nn.ReLU(inplace=True)
        )
        self.c4 = nn.Sequential(
           nn.Conv1d(256, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm1d(64),
            nn.ReLU(inplace=True)
        )
        self.c5 = nn.Sequential(
            nn.MaxPool1d(kernel_size=self.input_size, stride=1),
            nn.Flatten(),
            nn.Linear(64, 128),
            nn.Linear(128, 128),
            nn.Linear(128, 128),
            nn.Linear(128, 4),
            nn.Softmax(dim=1)
        )     

    def forward(self, x):
        x1 = self.c1(x)
        x2 = self.c2(x1)
        x3 = self.c3(x2)
        x4 = self.c4(x3)
        x5 = self.c5(x4)
        return x5


In [15]:
# Model Testing
input_size = 661500
  # Example input size
x = torch.randn(4, 1, input_size).abs()  # Batch=1, Channels=1, Length=1099228
model_2 = CNN1DV3(input_size)
output = model_2(x)

print(output.shape)  # Expected Output: (1, 4)

torch.Size([4, 4])


In [17]:
output

tensor([[0.3362, 0.1064, 0.2470, 0.3103],
        [0.3454, 0.0966, 0.2300, 0.3280],
        [0.3422, 0.0978, 0.2478, 0.3122],
        [0.3478, 0.1026, 0.2363, 0.3133]], grad_fn=<SoftmaxBackward0>)

In [18]:
summary(model_2, input_size=(1, 1, input_size), device='cpu')

Layer (type:depth-idx)                   Output Shape              Param #
CNN1DV3                                  [1, 4]                    --
├─Sequential: 1-1                        [1, 64, 661500]           --
│    └─Conv1d: 2-1                       [1, 64, 661500]           256
│    └─BatchNorm1d: 2-2                  [1, 64, 661500]           128
│    └─ReLU: 2-3                         [1, 64, 661500]           --
├─Sequential: 1-2                        [1, 256, 661500]          --
│    └─Conv1d: 2-4                       [1, 256, 661500]          49,408
│    └─BatchNorm1d: 2-5                  [1, 256, 661500]          512
│    └─ReLU: 2-6                         [1, 256, 661500]          --
├─Sequential: 1-3                        [1, 256, 661500]          --
│    └─Conv1d: 2-7                       [1, 256, 661500]          196,864
│    └─BatchNorm1d: 2-8                  [1, 256, 661500]          512
│    └─ReLU: 2-9                         [1, 256, 661500]          --
├─

In [22]:
class ENFCNNModel(nn.Module):
    """CNN model for audio tampering detection based on ENF features"""
    def __init__(self):
        super(ENFCNNModel, self).__init__()
        
        self.conv_layers = nn.Sequential(
            nn.Conv1d(1, 16, kernel_size=3, padding=1),
            nn.ReLU(),
            # nn.MaxPool1d(2), 
            
            nn.Conv1d(16, 32, kernel_size=3, padding=1),
            nn.ReLU(),
            # nn.MaxPool1d(2),
            
            nn.Conv1d(32, 64, kernel_size=3, padding=1),
            nn.ReLU(),
            # nn.MaxPool1d(2)
        )
        
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.Linear(64 * 4, 128),  # Assuming we have at least 32 frames
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        return x

In [23]:
model = ENFCNNModel()

summary(model = model, input_size=(16,1,4))

Layer (type:depth-idx)                   Output Shape              Param #
ENFCNNModel                              [16, 1]                   --
├─Sequential: 1-1                        [16, 64, 4]               --
│    └─Conv1d: 2-1                       [16, 16, 4]               64
│    └─ReLU: 2-2                         [16, 16, 4]               --
│    └─Conv1d: 2-3                       [16, 32, 4]               1,568
│    └─ReLU: 2-4                         [16, 32, 4]               --
│    └─Conv1d: 2-5                       [16, 64, 4]               6,208
│    └─ReLU: 2-6                         [16, 64, 4]               --
├─Sequential: 1-2                        [16, 1]                   --
│    └─Flatten: 2-7                      [16, 256]                 --
│    └─Linear: 2-8                       [16, 128]                 32,896
│    └─ReLU: 2-9                         [16, 128]                 --
│    └─Dropout: 2-10                     [16, 128]                 --
│    