In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import pandas as pd
from PIL import Image
import os
import csv

# 定义自己的数据集类 Define the dataloader
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from torchvision import transforms
import pandas as pd
from PIL import Image
import os
import csv

# 定义自己的数据集类 Define the dataloader
class MyDataset(Dataset):
    def __init__(self, img_dir, label_file, transform=None):
        self.img_dir = img_dir
        self.label_df = pd.read_csv(label_file)
        self.transform = transform

    def __len__(self):
        return len(self.label_df)

    def __getitem__(self, idx):
        img_name = f"{self.label_df.iloc[idx, 0]}.jpg"
        img_path = os.path.join(self.img_dir, img_name)
        image = Image.open(img_path)
        label = self.label_df.iloc[idx, 1]
        if self.transform:
            image = self.transform(image)
        return image, label

# 定义卷积神经网络 Design the CNN
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        self.fc_layers = nn.Sequential(
            nn.Flatten(),
            nn.LazyLinear(3),
        )

    def forward(self, x) -> torch.Tensor:
        x = self.conv_layers(x)
        x = self.fc_layers(x)
        x = nn.functional.log_softmax(x, dim=1)
        return x

In [None]:
if __name__ == '__main__':
    #-------------读取训练集,训练集地址已经设定好，下面这段不用修改------------------#
    #-----Read the training set, the address of the training set has been set, and the following section does not need to be modified-------#
    train_path = "/bohr/train-jcym/v1/"

    #-------------读取测试集---------------#“DATA_PATH”是测试集加密后的环境变量，按照如下方式可以在提交后，系统评分时访问测试集，但是选手无法直接下载
    #----Read the testing set, “DATA_PATH” is an environment variable for the encrypted test set. After submission, you can access the test set for system scoring in the following manner, but the contestant cannot download it directly.-----#
    if os.environ.get('DATA_PATH'):
        DATA_PATH = os.environ.get("DATA_PATH") + "/"
    else:
        print("Baseline运行时，因为无法读取测试集，所以会有此条报错，属于正常现象")
        print("When baseline is running, this error message will appear because the test set cannot be read, which is a normal phenomenon.")
        #Baseline运行时，因为无法读取测试集，所以会有此条报错，属于正常现象
        #When baseline is running, this error message will appear because the test set cannot be read, which is a normal phenomenon.
    # 数据预处理
    transform = transforms.Compose([
        transforms.Grayscale(num_output_channels=1),
        transforms.Resize((80, 60)),
        transforms.ToTensor(),
    ])
    #-----------使用自己定义的DataLoader读取数据----------#
    train_dataset = MyDataset(img_dir= train_path + 'image_train', label_file= train_path + 'label_train.csv', transform=transform)
    test_dataset = MyDataset(img_dir= DATA_PATH + 'image_test', label_file= DATA_PATH +'label_test_nolabel.csv', transform=transform)

    train_loader = DataLoader(dataset=train_dataset, batch_size=25, shuffle=True)
    test_loader = DataLoader(dataset=test_dataset, batch_size=25, shuffle=False)
    #--------------------开始训练模型 Start Training and Testing---------------------#
    net = ConvNet()
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    net.to(device)
    print(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(net.parameters(), lr=0.01)

    # 训练网络  Training
    num_epochs = 20
    for epoch in range(num_epochs):
        print("epoch:",epoch)
        net.train()
        running_loss = 0.0
        correct = 0
        total = 0
        for images, labels in train_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = net(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        epoch_accuracy = correct / total
        print(f"Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}, Accuracy: {epoch_accuracy}")

    # 创建一个空的DataFrame来存储图片名称和预测的label值，Create an empty DataFrame to store image names and predicted label values.
    submission_df = pd.DataFrame(columns=['file_id', 'label']) #遍历图片 Traverse images
    file_name_mapping = {i: f"{row[0]}.jpg" for i, row in enumerate(test_dataset.label_df.itertuples(index=False))}
    #print(file_name_mapping)
    net.eval() #Test
    with torch.no_grad():
        for i, (images, _) in enumerate(test_loader):
            images = images.to(device)
            outputs = net(images)
            _, predicted = torch.max(outputs.data, 1)
            predicted = predicted.cpu().numpy()
            batch_size = images.size(0)
            batch_indices = list(range(i * batch_size, (i + 1) * batch_size))
            batch_file_names = [file_name_mapping[idx].replace('.jpg', '') for idx in batch_indices]
            batch_df = pd.DataFrame({'file_id': batch_file_names, 'label': predicted})
            submission_df = pd.concat([submission_df, batch_df], ignore_index=True)

    # 根据中学生物学知识，将0,1,2替换为细胞名称，建议下载图片后，先确定对应顺序，以下的对应顺序仅仅是参考，并不准确
    # According to high school biology knowledge, replace 0, 1, 2 with cell names.
    # It is recommended to determine the corresponding order after downloading the image. The following corresponding order is just for reference and may not be accurate.
    submission_df['label']=submission_df['label'].map({0: 'Epidermis Cell', 1: 'Dermal Tissue Cell', 2: 'Meristematic Tissue Cell'})
    submission_df.to_csv('submission.csv', index=False)