In [1]:
import os
import pandas as pd
import numpy as np
from datetime import datetime as datatime
from pathlib import Path
import torch
import torch.nn as nn
from PIL import Image
import torch.optim as optim
from torchvision import transforms
from torchvision import models

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,f1_score,precision_score,recall_score,confusion_matrix,classification_report
from sklearn.model_selection import StratifiedShuffleSplit
from sklearn.model_selection import train_test_split

from tqdm import tqdm

In [2]:
df = pd.read_csv('./COMP90086_2024_Project_train/train.csv')
df.head()

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle,stable_height
0,54,2,1,3,1,1,2
1,173,1,1,4,1,2,1
2,245,1,1,4,1,2,1
3,465,2,1,5,0,1,5
4,611,2,1,3,1,1,1


In [3]:
df[df['id']==333181]

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle,stable_height
2509,333181,1,2,3,0,1,3


In [4]:
class ReadData(Dataset):

  def __init__(self, df, img_dir,label_column, transform = None):
    """
    Initialize the dataset
    Args:
      df: pandas dataframe, the data of the dataset
      img_dir: string, the directory of the images
      transform: torchvision.transforms, the transform to apply to the images
    """
    self.df = df
    self.img_dir = img_dir
    self.transform = transform
    self.label_column = label_column
    img_dir_path = Path(self.img_dir)

    if not img_dir_path.exists():
        raise ValueError(f"The directory {img_dir_path} does not exist!")
    if not img_dir_path.is_dir() or not os.access(img_dir_path, os.R_OK):
        raise ValueError(f"The directory {img_dir_path} is not accessible or readable!")
      
  """
  Get the length of data set
  """
  def __len__(self):
    return len(self.df)

  """
  Get the image and label of the data set
  Args:
    index: int, the index of the image
  Returns:  
    image: PIL image, the image of the data set
    label: int, the label of the data set
  """
  def __getitem__(self, index):
    img_name = os.path.join(self.img_dir, str(self.df.iloc[index, 0])) 
    image = Image.open(img_name + ".jpg")

    if self.label_column == 'instability_type':
      label = self.df.iloc[index,4] 
  
    elif self.label_column == 'stable_height': 
      label = self.df.iloc[index,-1] 
    
    elif self.label_column == 'total_height':
      label = self.df.iloc[index, 3]
    else:
      try:
        print(self.label_column)
      except:
        print("Not found label column")
        
    if self.transform:
      image = self.transform(image)

    return image, label 

In [5]:
class FineTunedGoogLeNet(nn.Module):
    def __init__(self):
        super(FineTunedGoogLeNet, self).__init__()
        # load the pre-trained model: gogglenet
        self.googlenet = models.googlenet(weights = models.GoogLeNet_Weights.IMAGENET1K_V1)

        num_ftrs = self.googlenet.fc.in_features
        self.googlenet.fc = nn.Identity()

        self.fc = nn.Sequential(
            nn.Linear(num_ftrs, 256),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(128, 6)

        )
    def forward(self, x):
      x = self.googlenet(x)
      x = self.fc(x)
      return x

In [6]:

class SimpleFineTunedGoogLeNet(nn.Module):
    def __init__(self):
        super(SimpleFineTunedGoogLeNet, self).__init__()

        self.googlenet = models.googlenet(weights = models.GoogLeNet_Weights.IMAGENET1K_V1)

        num_ftrs = self.googlenet.fc.in_features
        self.googlenet.fc = nn.Identity()

        self.fc = nn.Sequential(
            nn.Linear(num_ftrs, 128),
            nn.ReLU(),
            nn.Dropout(0.50),
            nn.Linear(128, 3)
        )
    def forward(self, x):
      x = self.googlenet(x)
      x = self.fc(x)
      return x

In [7]:
csv_path = './COMP90086_2024_Project_train/train.csv'
img_dir = './COMP90086_2024_Project_train/train'

# Validation Prediction

# Load Model Template

In [8]:
class ModelTemplate:
    def __init__(self, model_path, model, label_column, 
                csv_path, img_dir, 
                stratify_column='stable_height', 
                test_size=0.2,
                batch_size=32,
                random_state = 42):

        self.model_path = model_path
        self.model = model
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)
        self.label_column = label_column
        
        self.img_dir = img_dir
        self.data_frame = pd.read_csv(csv_path)
        self.stratify_column = stratify_column
        self.test_size = test_size
        self.random_state = random_state
        self.batch_size = batch_size

        # preprocess images 
        self.transform = transforms.Compose([
            # 这个变换将图像从PIL图像或NumPy数组转换为PyTorch张量。转换过程中，图像的像素值会被归一化到[0, 1]区间，
            # 并且通道顺序会被调整为(C, H, W)，即通道数在前，高度和宽度在后。
            transforms.ToTensor(),
            # 这个变换对图像进行标准化。标准化操作是将图像的每个通道减去该通道的均值，然后除以该通道的标准差。
            # 这里的均值和标准差是预训练的模型
            # 在ImageNet数据集上计算得到的，用于确保图像的分布与模型训练时的分布一致，从而提高模型的泛化能力。
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])


        # split data into train and validation 
        # Create data loader
        self.train_data, self.val_data = self.split_train_valid()
        self.train_loader = self.generate_dataloader(self.transform, self.train_data)
        self.valid_loader = self.generate_dataloader(self.transform, self.val_data,  shuffle=False)
        
        self.model = self.load_model()


    def split_train_valid(self):
        train_data, val_data = train_test_split(
                                    self.data_frame,
                                    test_size=self.test_size,
                                    random_state=self.random_state,
                                    stratify=self.data_frame[self.stratify_column]
                                )

        print(f"Train dataset size: {len(train_data)}",
                f"Validation dataset size: {len(val_data)}")
        return train_data, val_data


    def load_model(self):
      #  self.model.load_state_dict(torch.load(self.model_path, map_location=self.device,weights_only=True))
        self.model.load_state_dict(torch.load(self.model_path, map_location=self.device,weights_only=True))

        self.model.eval()
        return self.model


    def generate_dataloader(self, transform, data_frame, shuffle=True):
      #  print("label column type", self.label_column)
        dataset = ReadData(data_frame, self.img_dir,self.label_column, transform=transform) 
        return DataLoader(dataset, batch_size=self.batch_size, shuffle=shuffle)



    def generate_classification_report(self, outputs, labels):
        matrix = confusion_matrix(labels, outputs)
        print(classification_report(labels, outputs, zero_division=0))
        print(matrix)


    def validate(self):
        self.model.eval()
        num_cor_pred = 0
        num_samples = 0
        gt_labels = []
        pred_labels = []

        with torch.no_grad():
            for inputs, labels in self.valid_loader:
                inputs, labels = inputs.to(self.device), labels.to(self.device).long()
                outputs = self.model(inputs)
                pred_class = torch.argmax(outputs, 1)
                if self.label_column == 'stable_height' or self.label_column == 'total_height':
                    pred_class = pred_class + 1
          #      labels = labels - 1

                gt_labels.extend(labels.cpu().numpy())
                pred_labels.extend(pred_class.cpu().numpy())
                # calculate number of correct predictions
                num_cor_pred += (pred_class == labels).sum().item()
                num_samples += labels.size(0)

        # calculate the accuracy rate
        val_accuracy = num_cor_pred / num_samples
        self.generate_classification_report(np.array(pred_labels), np.array(gt_labels))
        self.pred_labels = pred_labels
        return val_accuracy

## Model 1: directly predict the stable height

In [9]:
valider_stabHei = ModelTemplate(
    model_path = './runs/20241012-StableHeight-BestModel-net8-acc0666/best_model.pth',
    model=FineTunedGoogLeNet(),
    label_column = 'stable_height',
    csv_path = csv_path, img_dir= img_dir, 
    test_size=0.2,
    batch_size=32
    )
valider_stabHei.validate()

Train dataset size: 6144 Validation dataset size: 1536
              precision    recall  f1-score   support

           1       0.74      0.72      0.73       384
           2       0.73      0.72      0.72       384
           3       0.64      0.70      0.67       307
           4       0.59      0.66      0.62       230
           5       0.62      0.55      0.59       154
           6       0.54      0.34      0.42        77

    accuracy                           0.67      1536
   macro avg       0.64      0.62      0.62      1536
weighted avg       0.67      0.67      0.67      1536

[[278  37  29  27  10   3]
 [ 38 278  30  24   9   5]
 [ 25  40 215  18   4   5]
 [ 18  11  33 151  13   4]
 [  8   7  18  31  85   5]
 [ 10  10  13   3  15  26]]


0.6725260416666666

In [14]:
valid_df = valider_stabHei.val_data
all_pred = valider_stabHei.pred_labels
valid_df['pred_label'] = all_pred
valid_df['pred_type'] = valid_df['pred_label'] == valid_df['stable_height']
valid_df.pred_label.value_counts()

pred_label
2    383
1    377
3    338
4    254
5    136
6     48
Name: count, dtype: int64

In [15]:
valid_df.groupby('instability_type').pred_type.sum()/valid_df.groupby('instability_type').pred_type.count()

instability_type
0    0.560976
1    0.594595
2    0.933333
Name: pred_type, dtype: float64

In [38]:
valid_df.head()

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle,stable_height,pred_label,pred_type
7334,956915,2,2,4,2,1,1,1,True
3955,516709,1,2,5,1,1,3,2,False
619,77447,1,2,5,1,1,3,3,True
1594,212770,2,2,4,2,1,1,1,True
5645,745098,2,2,6,1,1,4,2,False


In [104]:
valid_df.groupby(['instability_type', 'type']).pred_type.sum()/valid_df.groupby(['instability_type', 'type']).pred_type.count()

instability_type  type
0                 1       0.784431
                  2       0.376238
1                 1       0.733333
                  2       0.454780
2                 1       0.932039
                  2       0.934783
Name: pred_type, dtype: float64

In [106]:
valid_df.groupby(['instability_type', 'type','cam_angle']).pred_type.count()

instability_type  type  cam_angle
0                 1     1            116
                        2             51
                  2     1            150
                        2             52
1                 1     1            291
                        2             99
                  2     1            295
                        2             92
2                 1     1            161
                        2             45
                  2     1            141
                        2             43
Name: pred_type, dtype: int64

In [108]:
valid_df.groupby(['instability_type', 'type','cam_angle']).pred_type.sum()/valid_df.groupby(['instability_type', 'type','cam_angle']).pred_type.count()

instability_type  type  cam_angle
0                 1     1            0.853448
                        2            0.627451
                  2     1            0.380000
                        2            0.365385
1                 1     1            0.745704
                        2            0.696970
                  2     1            0.477966
                        2            0.380435
2                 1     1            0.987578
                        2            0.733333
                  2     1            0.964539
                        2            0.837209
Name: pred_type, dtype: float64

In [111]:
valid_df.groupby(['instability_type','shapeset']).pred_type.sum()/valid_df.groupby(['instability_type','shapeset']).pred_type.count()

instability_type  shapeset
0                 1           0.530435
                  2           0.574803
1                 1           0.687351
                  2           0.486034
2                 2           0.933333
Name: pred_type, dtype: float64

In [115]:
valid_df.groupby(['shapeset','type']).pred_type.sum()/valid_df.groupby(['shapeset','type']).pred_type.count()

shapeset  type
1         1       0.860465
          2       0.460145
2         1       0.766337
          2       0.597586
Name: pred_type, dtype: float64

In [118]:
valid_df[(valid_df['type']==2)&(valid_df['pred_type']==False)&(valid_df['instability_type']==0)&(valid_df['shapeset']==2)]

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle,stable_height,pred_label,pred_type
1745,231667,2,2,6,0,1,6,5,False
4744,626871,2,2,5,0,1,5,3,False
2714,357993,2,2,3,0,1,3,1,False
6493,852975,2,2,5,0,1,5,1,False
5391,711689,2,2,2,0,2,2,1,False
...,...,...,...,...,...,...,...,...,...
2878,380572,2,2,2,0,1,2,1,False
1547,206872,2,2,2,0,2,2,1,False
2096,279373,2,2,5,0,1,5,4,False
6836,893651,2,2,3,0,1,3,2,False


In [42]:
[valid_df['total_height'] - valid_df['pred_label']].value_counts()

AttributeError: 'list' object has no attribute 'value_counts'

In [40]:
pred_0 = valid_df[(valid_df['instability_type']==0)&(valid_df['type']==2)] 
pred_0.loc[:,'diff_pred_tt'] = pred_0['total_height'] - pred_0['pred_label']
pred_0['diff_pred_tt'].value_counts()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  pred_0.loc[:,'diff_pred_tt'] = pred_0['total_height'] - pred_0['pred_label']


diff_pred_tt
0    76
1    59
2    27
3    20
4    13
5     7
Name: count, dtype: int64

## Model 2: Predict the instability type

In [10]:
valider_instabType = ModelTemplate(
    model_path = './runs/20241013-Class-InstabilityType-ClassWeight114/best_model.pth',
    model= SimpleFineTunedGoogLeNet(),
    label_column = 'instability_type',
    csv_path = csv_path, img_dir= img_dir, 
    test_size=0.2,
    batch_size=32
    )
valider_instabType.validate()

Train dataset size: 6144 Validation dataset size: 1536
              precision    recall  f1-score   support

           0       0.54      0.22      0.31       369
           1       0.70      0.88      0.78       777
           2       0.91      0.95      0.93       390

    accuracy                           0.74      1536
   macro avg       0.72      0.68      0.68      1536
weighted avg       0.72      0.74      0.71      1536

[[ 82 273  14]
 [ 68 687  22]
 [  2  19 369]]


0.7408854166666666

### Fewer layers in fine tuned googlenet

In [11]:
valider_instabType = ModelTemplate(
    model_path = './runs/20241013-Class-InstabilityType-net8/best_model.pth',
    model= FineTunedGoogLeNet(),
    label_column = 'instability_type',
    csv_path = csv_path, img_dir= img_dir, 
    test_size=0.2,
    batch_size=32
    )
valider_instabType.validate()

Train dataset size: 6144 Validation dataset size: 1536
              precision    recall  f1-score   support

           0       0.61      0.43      0.50       369
           1       0.75      0.88      0.81       777
           2       0.98      0.92      0.95       390

    accuracy                           0.78      1536
   macro avg       0.78      0.74      0.75      1536
weighted avg       0.77      0.78      0.77      1536

[[157 207   5]
 [ 92 681   4]
 [  8  22 360]]


0.7799479166666666

In [16]:
instable_df = valider_instabType.val_data
predictions = valider_instabType.pred_labels

instable_df['pred_label'] = predictions
instable_df['pred_type'] = instable_df['pred_label'] == instable_df['instability_type']
instable_df.head()

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle,stable_height,pred_label,pred_type
7334,956915,2,2,4,2,1,1,2,True
3955,516709,1,2,5,1,1,3,1,True
619,77447,1,2,5,1,1,3,1,True
1594,212770,2,2,4,2,1,1,2,True
5645,745098,2,2,6,1,1,4,1,True


In [26]:
instable_df.groupby(['instability_type', 'type']).pred_type.sum()/instable_df.groupby(['instability_type', 'type']).pred_type.count()

instability_type  type
0                 1       0.646707
                  2       0.242574
1                 1       0.846154
                  2       0.906977
2                 1       0.932039
                  2       0.913043
Name: pred_type, dtype: float64

In [46]:
instable_df[(instable_df['type']==2) &
            (instable_df['instability_type']==0)]

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle,stable_height,pred_label,pred_type
1745,231667,2,2,6,0,1,6,1,False
800,102092,1,2,5,0,1,5,1,False
5949,783872,1,2,4,0,1,4,0,True
4744,626871,2,2,5,0,1,5,1,False
37,5137,1,2,3,0,1,3,1,False
...,...,...,...,...,...,...,...,...,...
7216,940729,2,2,4,0,1,4,0,True
6836,893651,2,2,3,0,1,3,1,False
4012,524778,1,2,6,0,1,6,1,False
5966,785888,2,2,5,0,1,5,1,False


In [45]:
instable_df[(instable_df['type']==2) &
            ((instable_df['instability_type']==0)|
            (instable_df['instability_type']==1))]

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle,stable_height,pred_label,pred_type
3955,516709,1,2,5,1,1,3,1,True
619,77447,1,2,5,1,1,3,1,True
5645,745098,2,2,6,1,1,4,1,True
2807,370164,1,2,6,1,1,5,1,True
1745,231667,2,2,6,0,1,6,1,False
...,...,...,...,...,...,...,...,...,...
4026,526081,1,2,6,1,1,5,1,True
5057,667657,2,2,3,1,1,1,1,True
174,23099,2,2,5,0,2,5,0,True
5389,711533,1,2,3,1,1,2,1,True


In [29]:
hard_0 = instable_df[(instable_df['instability_type']==0)&(instable_df['type']==2)&(instable_df['pred_label']==1)]
hard_0

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle,stable_height,pred_label,pred_type
1745,231667,2,2,6,0,1,6,1,False
800,102092,1,2,5,0,1,5,1,False
4744,626871,2,2,5,0,1,5,1,False
37,5137,1,2,3,0,1,3,1,False
3032,398925,1,2,6,0,1,6,1,False
...,...,...,...,...,...,...,...,...,...
1547,206872,2,2,2,0,2,2,1,False
2096,279373,2,2,5,0,1,5,1,False
6836,893651,2,2,3,0,1,3,1,False
4012,524778,1,2,6,0,1,6,1,False


In [32]:
hard_0.type.value_counts()

type
2    149
Name: count, dtype: int64

In [34]:
instable_df.groupby(['instability_type','type']).pred_type.sum()/instable_df.groupby(['instability_type','type']).pred_type.count()

instability_type  type
0                 1       0.646707
                  2       0.242574
1                 1       0.846154
                  2       0.906977
2                 1       0.932039
                  2       0.913043
Name: pred_type, dtype: float64

# Model 3

In [12]:
valider_tt_height = ModelTemplate(
    model_path = './runs/20241014-TotalHeight-Classification-net8/best_model.pth',
    model=FineTunedGoogLeNet(),
    label_column = 'total_height',
    csv_path = csv_path, img_dir= img_dir, 
    test_size=0.2,
    batch_size=32
    )
valider_tt_height.validate()

Train dataset size: 6144 Validation dataset size: 1536
              precision    recall  f1-score   support

           2       0.95      1.00      0.97       149
           3       0.93      0.96      0.95       222
           4       0.92      0.95      0.94       304
           5       0.94      0.91      0.92       395
           6       0.98      0.94      0.96       466

    accuracy                           0.95      1536
   macro avg       0.94      0.95      0.95      1536
weighted avg       0.95      0.95      0.95      1536

[[149   0   0   0   0]
 [  8 213   1   0   0]
 [  0  14 290   0   0]
 [  0   1  24 360  10]
 [  0   0   1  25 440]]


0.9453125

In [13]:
tt_df = valider_tt_height.val_data
pred_labels = valider_tt_height.pred_labels
tt_df['pred_label'] = pred_labels
tt_df['pred_type'] = tt_df['pred_label'] == tt_df['total_height']
tt_df.head()

Unnamed: 0,id,shapeset,type,total_height,instability_type,cam_angle,stable_height,pred_label,pred_type
7334,956915,2,2,4,2,1,1,4,True
3955,516709,1,2,5,1,1,3,5,True
619,77447,1,2,5,1,1,3,5,True
1594,212770,2,2,4,2,1,1,4,True
5645,745098,2,2,6,1,1,4,6,True


In [18]:
tt_df.groupby('instability_type').pred_type.sum()/tt_df.groupby('instability_type').pred_type.count()

instability_type
0    0.951220
1    0.945946
2    0.938462
Name: pred_type, dtype: float64

In [18]:
stable_type_gt = tt_df[tt_df['instability_type']==0].total_height.tolist()
stable_type_pred = tt_df[tt_df['instability_type']==0].pred_label.tolist()
confusion_matrix(stable_type_gt, stable_type_pred)

array([[71,  0,  0,  0,  0],
       [ 4, 79,  1,  0,  0],
       [ 0,  5, 65,  0,  0],
       [ 0,  0,  3, 61,  3],
       [ 0,  0,  0,  2, 75]])

# Combine three models

In [19]:
instab_predictions = valider_instabType.pred_labels
tt_predictions = valider_tt_height.pred_labels
stable_hei_predictions = valider_stabHei.pred_labels

In [22]:
gt_stable_hei = valider_stabHei.val_data.stable_height.tolist()

In [21]:
combined_predictions = []
for idx in range(len(instab_predictions)):
    if instab_predictions[idx] == 0:
        combined_predictions.append(tt_predictions[idx])
    else:
        combined_predictions.append(stable_hei_predictions[idx])

In [26]:
confusion_matrix(gt_stable_hei, stable_hei_predictions)

array([[278,  37,  29,  27,  10,   3],
       [ 38, 278,  30,  24,   9,   5],
       [ 25,  40, 215,  18,   4,   5],
       [ 18,  11,  33, 151,  13,   4],
       [  8,   7,  18,  31,  85,   5],
       [ 10,  10,  13,   3,  15,  26]])

In [24]:
confusion_matrix(gt_stable_hei, combined_predictions)

array([[258,  45,  34,  31,  12,   4],
       [ 32, 279,  31,  27,   9,   6],
       [ 20,  34, 219,  20,   8,   6],
       [ 16,  11,  28, 152,  17,   6],
       [  8,   7,  18,  27,  87,   7],
       [  9,   9,  13,   3,  15,  28]])

In [27]:
accuracy_score(gt_stable_hei, stable_hei_predictions)

0.6725260416666666

# Test prediction

In [None]:
class TestDataset(Dataset):
  '''
  get the image and the file path
  '''
  def __init__(self, csv_file, img_dir, transform = None):
    self.data_frame = pd.read_csv(csv_file)
    self.img_dir = img_dir
    self.transform = transform

  '''
  Return the size of the dataset
  '''
  def __len__(self):
    return len(self.data_frame)

  '''
  get the image and related column
  '''
  def __getitem__(self, idx):
    img_name = os.path.join(self.img_dir, str(self.data_frame.iloc[idx, 0]))
    image = Image.open(img_name + ".jpg")
    if self.transform:
      image = self.transform(image)
    return image, self.data_frame.iloc[idx, 0]



class BlockStackPredictor:
    def __init__(self, model, model_path, test_csv, img_dir, batch_size=32):
        self.model = model
        self.model_path = model_path
        self.test_csv = test_csv
        self.img_dir = img_dir
        self.batch_size = batch_size

        # 设备配置
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)

        # 加载模型权重
        #self.model = self.load_model(self.model, self.model_path)
        self.model = self.load_model()

        # 图像转换操作
        self.transform = transforms.Compose([
            transforms.Resize((224, 224)),
            transforms.ToTensor(),
            transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
        ])

        # 创建 DataLoader
        self.test_loader = self.create_dataloader()

    #改
    def load_model(self):
        """加载训练好的模型权重。

        参数:
        model (nn.Module): 要加载权重的模型。
        model_path (str): 模型权重文件的路径。

        返回:
        model (nn.Module): 加载了权重的模型。
        """
        self.model.load_state_dict(torch.load(self.model_path, map_location=self.device))
        self.model.eval()
        return model

    def create_dataloader(self):
        test_dataset = TestDataset(self.test_csv, self.img_dir, transform=self.transform)
        test_loader = DataLoader(test_dataset, batch_size=self.batch_size, shuffle=False)
        return test_loader



    def predict(self):
        all_predictions = []
        all_image_ids = []

        with torch.no_grad():
            for images, image_ids in tqdm(self.test_loader, desc="testing"):
                images = images.to(self.device)
                outputs = self.model(images)
                #outputs = torch.round(outputs).clamp(1,6) #在1-6之间输出
                predictions = torch.argmax(outputs, dim=1).cpu().numpy()
                #predictions = outputs.squeeze().cpu().numpy()
                predictions = predictions.astype(int) + 1
                #predictions = predictions.astype(int)

                all_predictions.extend(predictions)
                all_image_ids.extend(image_ids.numpy())


        prediction_df = pd.DataFrame({
            'id': all_image_ids,
            'stable_height': all_predictions
             })

        return prediction_df

    def save_predictions(self, output_csv):
      """将预测结果保存为CSV文件。"""
      prediction_df = self.predict()
      prediction_df.to_csv(output_csv, index=False)
      print(f"预测结果已保存到 {output_csv}")

In [None]:
if __name__ == "__main__":
    # Initialize the model object from a defined class BlockStackNet7
    model = TunnedBlockStackNet11()

    # Path to the trained model's weights
    model_path = 'Physical_Reasoning/runs/20241012-StableHeight-BestModel-net8-acc0666/best_model.pth' # BLOCK10,reg-CrossEntropy(128,6)-start-512-epoch-30 validation-loss:0.652 test:
    # Directory where test images are stored
    img_dir = '/content/drive/MyDrive/CV final project/test data/test'

    # Create a predictor instance with the specified model, model path, test data csv file, and image directory
    predictor = BlockStackPredictor(
        model=model,
        model_path=model_path,
        test_csv=test_csv,
        img_dir=img_dir,
        batch_size=32  # Specifying the batch size for processing
    )

    # Use the predictor to generate predictions
    prediction_df = predictor.predict()

    # Save the predictions to a CSV file
    predictor.save_predictions('/content/drive/MyDrive/CV final project/BLOCK11-reg-CrossEntropy-30epoches-start256-128-6-034836.csv')