In [7]:
!pip install pandas



In [8]:
# import zipfile
# with zipfile.ZipFile('processed.zip', 'r') as zip_ref:
#     zip_ref.extractall()

# # Output:
# # This will extract all files from 'file.zip'


In [15]:
import os
import pandas as pd
import numpy as np
from PIL import Image
import torch
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader

def load_image(image_path, target_size=(285, 285)):
    """Load an image from file and resize it to the target size."""
    img = Image.open(image_path).convert('RGB')
    img = img.resize((285, 285))
    max= np.array(img)
    img = np.array(img) / max.max()
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
    img = transform(img)
    return img

class CustomDataset(Dataset):
    def __init__(self, main_folder, target_size=(285, 285)):
        self.main_folder = main_folder
        self.target_size = target_size
        self.data = []
        # c=0

        for subdir, _, files in os.walk(main_folder):
            print(subdir,_)
            if 'image_data.csv' in files:
                csv_path = os.path.join(subdir, 'image_data.csv')
                df = pd.read_csv(csv_path)
                target_image_path = os.path.join(subdir, 'target.jpg')
                for _, row in df.iterrows():
                    # print(c)
                    # c+=1
                    X2_image_path = os.path.join(subdir, row['Image_Name'])
                    self.data.append((target_image_path, X2_image_path, row['x'], row['y'],row['z'],row['roll'], row['pitch'], row['yaw']     ))

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        target_image_path, X2_image_path, y1, y2, y3, y4, y5, y6 = self.data[idx]
        target_image = load_image(target_image_path, self.target_size)
        X2_image = load_image(X2_image_path, self.target_size)
        return target_image, X2_image, torch.tensor([y1, y2, y3, y4, y5, y6], dtype=torch.float32)

# Example usage
main_folder = './processed'  # Replace with your main folder path
dataset = CustomDataset(main_folder)
# Split dataset into training and validation sets
# train_size = int(0.8 * len(dataset))
# val_size = len(dataset) - train_size
# train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

# train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
# val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

dataloader = DataLoader(dataset, batch_size=32, shuffle=True)


./processed ['D8_1_2_3', 'D7_1_2', 'D8_1_2', 'D1', 'D8', 'D9_1_2', 'D9_1', 'D3_1', 'D4_1_2_3', 'D4_1', 'D4_1_2', 'D6_1_2', 'D3_1_2_3', 'D2_1_2', 'D4', 'D5_1_2', 'D9', 'D5_1_2_3', 'D3_2', 'D3', 'D2', 'D2_1', 'D8_1', 'D6_1', 'D5_1', 'D1_1', 'D7_1', 'D5', 'D1_1_2_3', 'D2_1_2_3', 'D3_1_2', 'D7', 'D9_1_2_3', 'D6', 'D7_1_2_3', 'D1_1_2', 'D6_1_2_3']
./processed/D8_1_2_3 []
./processed/D7_1_2 []
./processed/D8_1_2 []
./processed/D1 []
./processed/D8 []
./processed/D9_1_2 []
./processed/D9_1 []
./processed/D3_1 []
./processed/D4_1_2_3 []
./processed/D4_1 []
./processed/D4_1_2 []
./processed/D6_1_2 []
./processed/D3_1_2_3 []
./processed/D2_1_2 []
./processed/D4 []
./processed/D5_1_2 []
./processed/D9 []
./processed/D5_1_2_3 []
./processed/D3_2 []
./processed/D3 []
./processed/D2 []
./processed/D2_1 []
./processed/D8_1 []
./processed/D6_1 []
./processed/D5_1 []
./processed/D1_1 []
./processed/D7_1 []
./processed/D5 []
./processed/D1_1_2_3 []
./processed/D2_1_2_3 []
./processed/D3_1_2 []
./process

In [10]:
import torch
import torch.nn as nn
from torchvision import models

class CustomResNet(nn.Module):
    def __init__(self, num_outputs=6):
        super(CustomResNet, self).__init__()
        self.resnet = models.resnet50(pretrained=False)
        num_ftrs = self.resnet.fc.in_features

        self.resnet.fc = nn.Identity()  # Remove the last fully connected layer

        # # Additional layers for combined features
        self.fc1 = nn.Linear(num_ftrs * 2, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 64)
        self.fc4 = nn.Linear(64, num_outputs)
        self.tanh = nn.Tanh()

    def forward(self, x1, x2):
        # Extract features from both images
        f1 = self.resnet(x1)
        f2 = self.resnet(x2)

        # Concatenate features
        combined = torch.cat((f1, f2), dim=1)



        # Fully connected layers
        x = nn.ReLU()(self.fc1(combined))
        x = nn.ReLU()(self.fc2(x))
        x = nn.ReLU()(self.fc3(x))
        x = self.tanh(self.fc4(x))

        return x

# Initialize the model
model = CustomResNet()

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)


CustomResNet(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
      

In [11]:
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


In [1]:
num_epochs = 30

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0

    for target_images, X2_images, labels in dataloader:
        target_images, X2_images, labels = target_images.to(device).float(), X2_images.to(device).float(), labels.to(device)

        optimizer.zero_grad()
        outputs = model(target_images, X2_images)
        loss = criterion(outputs, labels)
        loss = loss
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * target_images.size(0)

    epoch_loss = running_loss / len(dataloader.dataset)
    print(f"Epoch {epoch+1}/{num_epochs}, Loss: {epoch_loss:.4f}")

print("Training completed!")


NameError: name 'model' is not defined

In [14]:
# #  docker run -d -p 8080:80 --name yusuf-conatiner pytorch/pytorch:latest


# # import matplotlib.pyplot as plt

# num_epochs = 10
# train_losses = []
# val_losses = []
# train_accuracies = []
# val_accuracies = []

# # def calculate_accuracy(outputs, labels, threshold=0.1):
# #     accuracies = []
# #     for i in range(outputs.shape[1]):
# #         pred = outputs[:, i]
# #         label = labels[:, i]
# #         acc = (torch.abs(pred - label) < threshold).float().mean().item()
# #         accuracies.append(acc)
# #     return np.mean(accuracies)

# for epoch in range(num_epochs):
#     model.train()
#     running_loss = 0.0
#     running_acc = 0.0
    

#     # for target_images, train_images, y1, y2 in train_loader:
#     for target_images, X2_images, labels in  train_loader:
#         target_images, X2_images, labels = target_images.to(device).float(), X2_images.to(device).float(), labels.to(device)
#     `
#         optimizer.zero_grad()
#         outputs = model(target_images, X2_images)
#         loss = criterion(outputs, labels)

#         loss.backward()
#         optimizer.step()

#         running_loss += loss.item() * target_images.size(0)
# #         running_acc += calculate_accuracy(outputs, labels.squeeze()) * target_images.size(0)

#     epoch_loss = running_loss / len(train_loader.dataset)
# #     epoch_acc = running_acc / len(train_loader.dataset)
#     train_losses.append(epoch_loss)
# #     train_accuracies.append(epoch_acc)

#     model.eval()
#     val_loss = 0.0
#     val_acc = 0.0
#     with torch.no_grad():
#         # for target_images, train_images, y1, y2 in val_loader:
#         for target_images, X2_images, labels in  val_loader:
#             target_images, X2_images, labels = target_images.to(device).float(), X2_images.to(device).float(), labels.to(device)

#             optimizer.zero_grad()
#             utputs = model(target_images, X2_images)
#             loss = criterion(outputs, labels)

#             val_loss += loss.item() * target_images.size(0)
# #             val_acc += calculate_accuracy(outputs, labels.squeeze()) * target_images.size(0)

#     epoch_val_loss = val_loss / len(val_loader.dataset)
# #     epoch_val_acc = val_acc / len(val_loader.dataset)
#     val_losses.append(epoch_val_loss)
# #     val_accuracies.append(epoch_val_acc)

#     print(f"Epoch {epoch+1}/{num_epochs}, Train Loss: {epoch_loss:.4f}, Train Acc: {epoch_acc:.4f}, Val Loss: {epoch_val_loss:.4f}, Val Acc: {epoch_val_acc:.4f}")

# print("Training completed!")


RuntimeError: The size of tensor a (11) must match the size of tensor b (16) at non-singleton dimension 0

In [18]:
# Save the model
torch.save(model.state_dict(), 'resnet_multi_input.pth')

# Load the model
model.load_state_dict(torch.load('resnet_multi_input.pth'))
model.to(device)
model.eval()


CustomResNet(
  (resnet): ResNet(
    (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu): ReLU(inplace=True)
    (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (layer1): Sequential(
      (0): Bottleneck(
        (conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu): ReLU(inplace=True)
        (downsample): Sequential(
      