In [18]:
import import_ipynb
from Functions import *

In [19]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

class AttentionLayer(nn.Module):
    def __init__(self, in_channels):
        super(AttentionLayer, self).__init__()
        self.conv = nn.Conv2d(in_channels, 1, kernel_size=1)
    
    def forward(self, x):
        attention = self.conv(x)  # Shape: (batch_size, 1, 300, 300)
        attention = torch.sigmoid(attention)
        return x * attention  # Element-wise multiplication

class ANNModel(nn.Module):
    def __init__(self):
        super(ANNModel, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=17, out_channels=16, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, padding=1)
        self.attention = AttentionLayer(in_channels=32)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(in_channels=64, out_channels=1, kernel_size=3, padding=1)
    
    def forward(self, x):
        x = torch.relu(self.conv1(x))  # Shape: (batch_size, 16, 300, 300)
        x = torch.relu(self.conv2(x))  # Shape: (batch_size, 32, 300, 300)
        x = self.attention(x)          # Shape: (batch_size, 32, 300, 300)
        x = torch.relu(self.conv3(x))  # Shape: (batch_size, 64, 300, 300)
        x = torch.sigmoid(self.conv4(x))  # Shape: (batch_size, 1, 300, 300)
        return x

def train_model(model, input_arrays, target_arrays, epochs=10, batch_size=32, learning_rate=0.001):
    # Convert input and target arrays to tensors and stack them
    inputs = torch.stack([torch.tensor(arr, dtype=torch.float32) for arr in input_arrays])
    targets = torch.stack([torch.tensor(arr, dtype=torch.float32) for arr in target_arrays])
    
    # Create a DataLoader for batching
    dataset = TensorDataset(inputs, targets)
    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    # Define loss function and optimizer
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    
    # Training loop
    for epoch in range(epochs):
        model.train()  # Set the model to training mode
        running_loss = 0.0
        for i, (batch_inputs, batch_targets) in enumerate(dataloader):
            optimizer.zero_grad()  # Zero the parameter gradients
            
            # Forward pass
            outputs = model(batch_inputs)
            loss = criterion(outputs, batch_targets)
            
            # Backward pass and optimize
            loss.backward()
            optimizer.step()
            
            # Accumulate loss
            running_loss += loss.item()
        
        # Print loss for the epoch
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {running_loss/len(dataloader):.4f}")
    
    print("Training complete.")

def predict(model, input_array):
    model.eval()  # Set the model to evaluation mode
    with torch.no_grad():  # Disable gradient calculation for inference
        input_tensor = torch.tensor(input_array, dtype=torch.float32).unsqueeze(0)  # Add batch dimension
        output = model(input_tensor)
    return output.squeeze(0).numpy()  # Remove batch dimension and convert to numpy array


In [21]:
F = read_images("saved_frames")
F[3] = F[3][:,:,:3]
F[6] = F[6][:,:,:3]
F[9] = F[9][:,:,:3]
F[14] = F[14][:,:,:3]
F = F[:15]
D = np.asarray([abs(F[i]-F[i+1]) for i in range(len(F)-1)],dtype=np.uint8)
D0 = np.asarray(D[:-1],dtype=np.uint8)
D2 = np.asarray(D[1:],dtype=np.uint8)
down = 0
up = 255
B = np.asarray([abs(cv2.Canny(cv2.cvtColor(F[i],cv2.COLOR_BGR2GRAY),down,up)-cv2.Canny(cv2.cvtColor(F[i+1],cv2.COLOR_BGR2GRAY),down,up)) for i in range(len(F)-1)],dtype=np.uint8)
B0 = np.asarray(B[:-1],dtype=np.uint8)
B2 = np.asarray(B[1:],dtype=np.uint8)
F0 = np.asarray(F[:-2],dtype=np.uint8)
F2 = np.asarray(F[2:],dtype=np.uint8) 
F = np.asarray(F[1:-1],dtype=np.uint8)

In [22]:
Input = [np.asarray((F0[i][:,:,0],F0[i][:,:,1],F0[i][:,:,2],
                    F[i][:,:,0],F[i][:,:,1],F[i][:,:,2],
                    F2[i][:,:,0],F2[i][:,:,1],F2[i][:,:,2],
                    D0[i][:,:,0],D0[i][:,:,1],D0[i][:,:,2],
                    D2[i][:,:,0],D2[i][:,:,1],D2[i][:,:,2],
                    B0[i],
                    B2[i])) for i in range(len(F))]

In [23]:
print(Input[0].shape)
print(len(Input))

(17, 320, 640)
13


In [24]:
display_frame(Input[1][14])

In [25]:
input_arrays = [torch.tensor(i) for i in Input[:-1]]

In [26]:
target_arrays = [torch.zeros(320,640) for _ in range(len(Input)-1)] 

In [27]:
Masks = read_images("Masks")
Masks = [cv2.cvtColor(m[:,:,:3],cv2.COLOR_RGB2GRAY) for m in Masks]
target_arrays[3] = Masks[0]
target_arrays[6] = Masks[1]
target_arrays[9] = Masks[2]

In [28]:
model = ANNModel()



Epoch [1/20], Loss: 588.0404
Epoch [2/20], Loss: 1143.2579
Epoch [3/20], Loss: 512.0490
Epoch [4/20], Loss: 574.0169
Epoch [5/20], Loss: 512.0231
Epoch [6/20], Loss: 512.0068
Epoch [7/20], Loss: 511.9993
Epoch [8/20], Loss: 1217.5575
Epoch [9/20], Loss: 1143.0240
Epoch [10/20], Loss: 511.9527
Epoch [11/20], Loss: 511.9406
Epoch [12/20], Loss: 1217.3653
Epoch [13/20], Loss: 511.9080
Epoch [14/20], Loss: 511.8850
Epoch [15/20], Loss: 648.2791
Epoch [16/20], Loss: 511.8465
Epoch [17/20], Loss: 1142.6051
Epoch [18/20], Loss: 586.2489
Epoch [19/20], Loss: 586.2169
Epoch [20/20], Loss: 511.7410
Training complete.


In [37]:
train_model(model, input_arrays, target_arrays, epochs=20, batch_size=14)



Epoch [1/20], Loss: 638.2362
Epoch [2/20], Loss: 638.2362
Epoch [3/20], Loss: 638.2361
Epoch [4/20], Loss: 638.2361
Epoch [5/20], Loss: 638.2361
Epoch [6/20], Loss: 638.2361
Epoch [7/20], Loss: 638.2362
Epoch [8/20], Loss: 638.2362
Epoch [9/20], Loss: 638.2361
Epoch [10/20], Loss: 638.2362
Epoch [11/20], Loss: 638.2361
Epoch [12/20], Loss: 638.2361
Epoch [13/20], Loss: 638.2361
Epoch [14/20], Loss: 638.2361
Epoch [15/20], Loss: 638.2361
Epoch [16/20], Loss: 638.2361
Epoch [17/20], Loss: 638.2361
Epoch [18/20], Loss: 638.2362
Epoch [19/20], Loss: 638.2362
Epoch [20/20], Loss: 638.2362
Training complete.


In [38]:
test_input_array = torch.tensor(Input[-1])  # Simulate a single input array
predicted_output = predict(model, test_input_array)
print(predicted_output.shape)  # Expected output shape: (300, 300)



(1, 320, 640)


In [39]:
display_frame(Input[-1][1])

In [40]:
display_frame(predicted_output.reshape(320,640).astype(np.uint8))