In [134]:
import torch
import torch.nn as nn
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import ast
import os

In [135]:
# List all input files
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        if not "chart" in filename:
            print(os.path.join(dirname, filename))

/kaggle/input/croppedimages2/updated_data.csv
/kaggle/input/think-cell-datathon/sample_submission.csv
/kaggle/input/think-cell-datathon/val_and_test.csv
/kaggle/input/think-cell-datathon/data_loading_example.ipynb
/kaggle/input/think-cell-datathon/train.csv


In [136]:
def load_pie_data(filepath):
    # Load data from CSV file into a DataFrame
    data = pd.read_csv(filepath)

    # Convert string representation of lists back to actual lists
    list_data_features = ["boxes", "start_angles", "end_angles", "angles", "percentages"]
    for column in list_data_features:
        data[column] = data[column].apply(ast.literal_eval)

    return data

train_df = load_pie_data("/kaggle/input/croppedimages2/updated_data.csv")


In [137]:
from torchvision.models import resnet18

class CenterCNN(nn.Module):
    def __init__(self):
        super(CenterCNN, self).__init__()
        self.backbone = resnet18(weights=None)
        self.backbone.fc = nn.Linear(512,2,bias=True)
        self.backbone.train()
    
    def forward(self, x):
        return self.backbone(x)

In [138]:
from PIL import Image
import torchvision.transforms as transforms
import os

def transformImages(idx, train_df):
    
    chart_dict = train_df.loc[idx]
    image_path = f"/kaggle/input/croppedimages2/cropped_images/cropped_images/{chart_dict['filename']}"
    if os.path.exists(image_path):
        image = Image.open(image_path).convert("RGB")

        transform = transforms.Compose([
            transforms.Resize((256, 256)),  # Resize the image to 224x224
            transforms.ToTensor(),  # Convert the image to a PyTorch tensor
        ])

        scale_x = 256 / image.height
        scale_y = 256 / image.width

        kp = train_df.loc[idx]["boxes"][0][-2:]
        kp[0] *= scale_x
        kp[1] *= scale_y

        image_tensor = transform(image)
        kp = torch.tensor(kp)
        return image_tensor, kp, scale_x, scale_y
    
    return None

In [139]:
from torch.utils.data import Dataset, DataLoader, random_split

class ImageDataset(Dataset):
    def __init__(self, data, transform=None):
        self.data = data
        self.transform = transform
        
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
        image_tensor, kp, scale_x, scale_y = transformImages(idx, self.data)
        return {"image": image_tensor, "keypoint": kp, "scale_x": scale_x, "scale_y": scale_y}
        

def train(train_dataset,val_dataset):
    epochs = 30
    
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    
    dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
    
    model = CenterCNN().to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr = 1e-3)
    
    for epoch in range(epochs):
        print("epoch", epoch)
        avg_loss = 0
        for idx, batch in enumerate(dataloader):
            model.train()
            optimizer.zero_grad()
            labels = batch["keypoint"].to(device)
            preds = model(batch["image"].to(device))
            criterion = nn.MSELoss()
            loss = criterion(preds, labels)
            loss.backward()
            optimizer.step()
            
            avg_loss += loss
        
        if val_dataset:
            validate(val_dataset, model, device)
            
        avg_loss /= len(dataloader)
        print("avg epoch loss:", avg_loss)
        
    return model

In [140]:
def unresize(kp, scale_x, scale_y):
    return torch.tensor([kp[0] / scale_x, kp[1] / scale_y],dtype=torch.float32)

def validate(val_dataset, model, device):

    dataloader = DataLoader(val_dataset, batch_size=1, shuffle=False)
    model.eval()
    avg_warped_loss = 0
    avg_true_error = 0
    with torch.no_grad():
        for idx, batch in enumerate(dataloader):
            labels = batch["keypoint"].to(device)
            preds = model(batch["image"].to(device))
            criterion = nn.MSELoss()
            loss = criterion(preds, labels)
            avg_warped_loss += loss
            pred_og = unresize(preds[0], batch["scale_x"][0], batch["scale_y"][0])
            label_og = unresize(labels[0], batch["scale_x"][0], batch["scale_y"][0])
            avg_true_error += torch.sqrt(torch.sum((pred_og - label_og)**2))
        
    avg_warped_loss /= len(dataloader)
    avg_true_error /= len(dataloader)
    print("avg warped loss", avg_warped_loss)
    print("avg true error", avg_true_error)

In [142]:
dataset = ImageDataset(train_df)
# train_size = int(0.8*len(dataset))
# valid_size = len(dataset) - train_size
# train_dataset, valid_dataset = random_split(dataset, [train_size, valid_size])
model = train(dataset, None)

path = "/kaggle/working/model_new.ckpt"

torch.save(model.state_dict(), path)

epoch 0
avg epoch loss: tensor(854.8578, device='cuda:0', grad_fn=<DivBackward0>)
epoch 1
avg epoch loss: tensor(166.7605, device='cuda:0', grad_fn=<DivBackward0>)
epoch 2
avg epoch loss: tensor(167.3474, device='cuda:0', grad_fn=<DivBackward0>)
epoch 3
avg epoch loss: tensor(166.3873, device='cuda:0', grad_fn=<DivBackward0>)
epoch 4
avg epoch loss: tensor(165.7303, device='cuda:0', grad_fn=<DivBackward0>)
epoch 5
avg epoch loss: tensor(164.6215, device='cuda:0', grad_fn=<DivBackward0>)
epoch 6
avg epoch loss: tensor(163.0002, device='cuda:0', grad_fn=<DivBackward0>)
epoch 7
avg epoch loss: tensor(161.7993, device='cuda:0', grad_fn=<DivBackward0>)
epoch 8
avg epoch loss: tensor(161.9917, device='cuda:0', grad_fn=<DivBackward0>)
epoch 9
avg epoch loss: tensor(161.3918, device='cuda:0', grad_fn=<DivBackward0>)
epoch 10
avg epoch loss: tensor(159.5151, device='cuda:0', grad_fn=<DivBackward0>)
epoch 11
avg epoch loss: tensor(157.6417, device='cuda:0', grad_fn=<DivBackward0>)
epoch 12
avg e

In [116]:
import csv

class TestDataset:
    def __init__(self, start, end):
        self.start = start
        self.end = end
    def __len__(self):
        return self.end-self.start
    def __getitem__(self,idx):
        idx = self.start + idx
        path = f"/kaggle/input/croppedimages2/cropped_images (2)/cropped_images/chart_{idx}.png"
        image = Image.open(path).convert("RGB")

        transform = transforms.Compose([
            transforms.Resize((256, 256)),  # Resize the image to 224x224
            transforms.ToTensor(),  # Convert the image to a PyTorch tensor
        ])

        scale_x = 256 / image.width
        scale_y = 256 / image.height
        
        image_tensor = transform(image)
        return {"image": image_tensor, "scale_x": scale_x, "scale_y": scale_y}
        
def test(model, mode):
    if mode == "val":
        start = 10000
        end = 10005
    elif mode == "test":
        start = 20000
        end = 30000
        
    model.eval()
    model.cpu()
    
    output = []
    dataset = TestDataset(start, end)
    dataloader = DataLoader(dataset, batch_size=1,shuffle=False)
    
    for idx, batch in enumerate(dataloader, start=10000):
        image_tensor = batch["image"][0]
        inputs = torch.unsqueeze(image_tensor,0)
        pred = model(inputs)
        
        pred = pred[0]
        scale_x = batch["scale_x"][0]
        scale_y = batch["scale_y"][0]
        
        pred_unwarp = [idx, (pred[0]/scale_x).item(), (pred[1]/scale_y).item()]
        
        output.append(pred_unwarp)
        
    csv_file = '{}_outputs_new.csv'.format(mode)

    with open(csv_file, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['id', 'x', 'y'])  # Write header
        writer.writerows(output)  # Write test outputs

test(model,"val")

In [None]:
idx = 10001

path = f"/kaggle/input/croppedimages2/cropped_images (2)/cropped_images/chart_{idx}.png"
image = Image.open(path).convert("RGB")
transform = transforms.Compose([
            transforms.ToTensor(),  # Convert the image to a PyTorch tensor
        ])
image = transform(image)

fig, axes = plt.subplots(1, 2, figsize=(10, 5))

axes[0].imshow(image[0])
axes[0].set_title('Image')
axes[0].axis('off')

csv_file = '/kaggle/working/val_outputs_new.csv'

df = pd.read_csv(csv_file)

ind = 4
kp = [df["x"][ind],df["y"][ind]]
print(kp)

axes[0].scatter(224, 221, color='red', marker='o', s=10)
plt.show()