# Creating the Dataset

In [2]:
import os
import torch
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import torchvision.transforms as transforms

# Function to parse filenames and extract labels
def parse_filename(filename):
    parts = filename.split('_')
    # Parse initial position
    if parts[1] == 'tura':
        initial_position = 0
    elif parts[1] == 'yazi':
        initial_position = 1
    elif parts[1] == 'vert':
        initial_position = 2
    else:
        raise ValueError("Unknown initial position in filename")

    # Parse distance
    distance = float(parts[2].replace('cm', ''))

    # Parse binary label, check last part before ".png"
    binary_label_part = parts[-1].split('.')[0]
    binary_label = 0 if binary_label_part == 'tura' else 1

    return initial_position, distance, binary_label

# Custom dataset class
class CoinDataset(Dataset):
    def __init__(self, image_folder, transform=None):
        self.image_folder = image_folder
        self.transform = transform
        # Sort the filenames numerically
        self.image_filenames = sorted(os.listdir(image_folder), key=lambda x: int(x.split('_')[0]))
        # Extract labels
        self.initial_positions, self.distance_labels, self.binary_labels = self.extract_labels()

    def extract_labels(self):
        initial_positions = []
        distance_labels = []
        binary_labels = []

        for filename in self.image_filenames:
            initial_position, distance, binary_label = parse_filename(filename)
            initial_positions.append(initial_position)
            distance_labels.append(distance)
            binary_labels.append(binary_label)

        return initial_positions, distance_labels, binary_labels

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_name = os.path.join(self.image_folder, self.image_filenames[idx])
        image = Image.open(img_name).convert('RGB')
        if self.transform:
            image = self.transform(image)

        binary_label = self.binary_labels[idx]
        distance_label = self.distance_labels[idx]
        initial_position = self.initial_positions[idx]

        return image, binary_label, distance_label, initial_position

# Define the image transformation
transform = transforms.Compose([
    # transforms.Resize((128, 128)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

# Example usage
folder_path = '/home/eren/Desktop/para/dataset/datason'
dataset = CoinDataset(image_folder=folder_path, transform=transform)

dataloader = DataLoader(dataset, batch_size=1, shuffle=True)

# Model Architecture

In [24]:
import torch
import torch.nn as nn

class Model_cointoss(nn.Module):
    def __init__(self):
        super(Model_cointoss, self).__init__()
        # Define the CNN layers
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.ReLU(),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.ReLU()
        )
        # Flatten layer
        self.flatten = nn.Flatten()
        # Dense layers
        self.dense_layers = nn.Sequential(
            nn.Linear(2560001,128),  # +1 for the integer label, adjust the input size
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
        )
        # Output layers
        self.classification_output = nn.Linear(64, 1)  # Single neuron for binary classification
        self.regression_output = nn.Linear(64, 1)  # Single neuron for regression output

    def forward(self, x, label):
        # Process the image through the convolutional layers
        x = self.conv_layers(x)
        x = self.flatten(x)
        
        # Combine the image features with the label
        combined = torch.cat((x, label.unsqueeze(1)), dim=1)
        
        # Pass through dense layers
        combined = self.dense_layers(combined)
        
        # Get the classification (using sigmoid for binary classification) and regression outputs
        class_output = torch.sigmoid(self.classification_output(combined))
        reg_output = self.regression_output(combined)
        
        return class_output, reg_output

# Instantiate the model
model = Model_cointoss()

# Training with Backpropagation on GPU using CUDA

In [25]:
import torch
import torch.optim as optim
import torch.nn as nn

# Check if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Loss functions
classification_loss_fn = nn.BCELoss()
regression_loss_fn = nn.MSELoss()

# Transfer the model to GPU
model.to(device)

# Optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training Loop
num_epochs = 10
for epoch in range(num_epochs):
    for images, binary_labels, distance_labels, initial_positions in dataloader:
        # Transfer data to GPU
        images = images.to(device)
        binary_labels = binary_labels.to(device).view(-1, 1).type(torch.float)
        distance_labels = distance_labels.to(device).view(-1, 1).type(torch.float)
        initial_positions = initial_positions.to(device)

        # Forward pass
        predicted_binary_labels, predicted_distance_labels = model(images, initial_positions)

        # Calculate loss
        classification_loss = classification_loss_fn(predicted_binary_labels, binary_labels)
        regression_loss = regression_loss_fn(predicted_distance_labels, distance_labels)
        total_loss = classification_loss + regression_loss

        # Backward pass and optimize
        optimizer.zero_grad()
        total_loss.backward()
        optimizer.step()
        
    # Print epoch's loss after each epoch (optional)
    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss.item()}')

Epoch [1/10], Loss: 120.93118286132812
Epoch [2/10], Loss: 1390.7713623046875
Epoch [3/10], Loss: 23.925704956054688
Epoch [4/10], Loss: 518.0977172851562
Epoch [5/10], Loss: 288.93133544921875
Epoch [6/10], Loss: 437.76385498046875
Epoch [7/10], Loss: 54.88039779663086
Epoch [8/10], Loss: 2.9349398612976074
Epoch [9/10], Loss: 62.506996154785156
Epoch [10/10], Loss: 87.97881317138672


### Saving model for later use

In [30]:
# Save the model state dict to a file
torch.save(model.state_dict(), 'model_ann.pth')