In [1]:

import torch
from torch import nn
import torch.nn.functional as F
from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm
import cached_dataloader
from PIL import Image

from transformers import ViTForImageClassification, ViTFeatureExtractor, ViTImageProcessor, ViTModel
from sklearn.metrics import f1_score, accuracy_score
from tqdm import tqdm

In [2]:
# global variables
BATCH_SIZE = 64
TRAIN_SPLIT = 0.9
MLP_HIDDEN_SIZES = [1024,512,256]
DROPOUT_PROB = [0, 0, 0]
LR = 0.1
MOMENTUM = 0.9
model_vit = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
img_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

configuration={}
configuration['Models']={}
configuration['Models']['mlp_num_classes']=2
configuration['Models']['mlp_hidden_sizes']= [1024,512,256]
configuration['Models']['mlp_dropout_prob']=[0.5,0.5,0.4]
configuration['Models']['encoder_finetuning']=True
configuration['Models']['image_processor']=img_processor
configuration['Dataset']={}
configuration['Dataset']['batch_size']=BATCH_SIZE


device = torch.device("cuda" if torch.cuda.is_available() else "mps")
print(device)
train_dataset, val_dataset = cached_dataloader.getData(BATCH_SIZE, TRAIN_SPLIT)



for x in train_dataset:
    print(type(x))
    break

for x in val_dataset:
    print(type(x))
    break


mps
<class 'tuple'>
<class 'tuple'>


In [3]:

class MLP(nn.Module):
    def __init__(self, in_channels, num_classes, hidden_sizes=[128, 64], dropout_probability=[0.5,0.7]):
        super(MLP, self).__init__()
        assert len(hidden_sizes) >= 1 , "specify at least one hidden layer"
        
        self.layers = self.create_layers(in_channels, num_classes, hidden_sizes, dropout_probability)


    def create_layers(self, in_channels, num_classes, hidden_sizes, dropout_probability):
        layers = []
        layer_sizes = [in_channels] + hidden_sizes + [num_classes]
        for i in range(len(layer_sizes)-1):
            layers.append(nn.Linear(layer_sizes[i], layer_sizes[i+1]))
            if i < len(layer_sizes)-2:
                layers.append(nn.BatchNorm1d(layer_sizes[i+1]))
                layers.append(nn.ReLU())
                layers.append(nn.Dropout(dropout_probability[i]))
            else:
                layers.append(nn.Softmax(dim=1))
        return nn.Sequential(*layers)

    def forward(self, x):
        out = x.view(x.shape[0], -1)
        out = self.layers(out)
        return out




In [4]:

class CombinedModel(nn.Module):
    def __init__(self, modality1, configuration,device):
        super().__init__()
        self.device = device
        self.modality1 = modality1.to(self.device)
        self.config = configuration

        self.head = MLP(in_channels=self._calculate_in_features(),
                            num_classes=self.config['Models']['mlp_num_classes'],
                            hidden_sizes=self.config['Models']['mlp_hidden_sizes'], 
                            dropout_probability= self.config['Models']['mlp_dropout_prob']).to(self.device)

        if(configuration['Models']['encoder_finetuning'] == False):
            for param in self.modality1.parameters():
                param.requires_grad = False

  

        for param in self.head.parameters():
            param.requires_grad = True

    def forward(self, input1):
        image_output = self.modality1(input1)['last_hidden_state'].to(self.device)
        image_output = torch.nn.Flatten()(image_output).to(self.device)
        head_output = self.head(image_output).to(self.device)
        return head_output

    def _calculate_in_features(self):
        # Create an example input and pass it through the network to get the output size
        image_list=[]
        img_batch = torch.randint(0, 255, size=(self.config['Dataset']['batch_size'], 3, 224, 224)).float()
        image_list.extend([image for image in img_batch])
        img_processor = self.config['Models']['image_processor']
        input1 = img_processor(image_list, return_tensors='pt').to(self.device) 
        image_output = self.modality1(**input1)['last_hidden_state'].to(self.device)
        image_output = torch.nn.Flatten()(image_output).to(self.device)
        return image_output.shape[1]





In [5]:

model=CombinedModel(model_vit,configuration,device)



# Define the loss function and optimizer
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-5)



In [6]:

# Define the training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    train_accuracy = 0.0
    train_f1 = 0.0
    image_list = []
    for i, (images, labels) in tqdm(enumerate(train_dataset), total = len(train_dataset), desc=f"[Epoch {epoch}]",ascii=' >='):
        image_list.extend([image for image in images])
        labels = labels.to(device)
        processed_imgs = img_processor(image_list, return_tensors='pt', data_format='channels_first').to(device)
        outputs = model(processed_imgs['pixel_values'])
        loss = loss_function(outputs, labels)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        image_list = []
        train_loss += loss.item()
        _, preds = torch.max(outputs, dim=1)
        train_accuracy += accuracy_score(labels.cpu(), preds.cpu())
        train_f1 += f1_score(labels.cpu(), preds.cpu(), average='macro')
        
    train_loss /= len(train_dataset)
    train_accuracy /= len(train_dataset)
    train_f1 /= len(train_dataset)
    
    model.eval()
    val_loss = 0.0
    val_accuracy = 0.0
    val_f1 = 0.0
    with torch.no_grad():
        for j, (images, labels) in tqdm(enumerate(val_dataset), total = len(val_dataset), desc=f"[Epoch {epoch}]",ascii=' >='):
            image_list.extend([image for image in images])
            labels = labels.to(device)
            processed_imgs = img_processor(image_list, return_tensors='pt', data_format='channels_first').to(device)
            outputs = model(processed_imgs['pixel_values'])
            loss = loss_function(outputs, labels)
            image_list = []
            val_loss += loss.item()
            _, preds = torch.max(outputs, dim=1)
            val_accuracy += accuracy_score(labels.cpu(), preds.cpu())
            val_f1 += f1_score(labels.cpu(), preds.cpu(), average='macro')
    
    val_loss /= len(val_dataset)
    val_accuracy /= len(val_dataset)
    val_f1 /= len(val_dataset)
    
    print(f"Epoch {epoch+1} | Train loss: {train_loss:.4f} | Train accuracy: {train_accuracy:.4f} | Train F1-score: {train_f1:.4f} | Validation loss: {val_loss:.4f} | Validation accuracy: {val_accuracy:.4f} | Validation F1-score: {val_f1:.4f}")
# Create some sample input data
input_data = torch.randn(BATCH_SIZE,3,224,224).to(device)





Epoch 1 | Train loss: 0.4016 | Train accuracy: 0.9394 | Train F1-score: 0.9383 | Validation loss: 0.8573 | Validation accuracy: 0.4559 | Validation F1-score: 0.3119




Epoch 2 | Train loss: 0.3381 | Train accuracy: 0.9845 | Train F1-score: 0.9841 | Validation loss: 0.5370 | Validation accuracy: 0.7476 | Validation F1-score: 0.7371




Epoch 3 | Train loss: 0.3278 | Train accuracy: 0.9902 | Train F1-score: 0.9899 | Validation loss: 0.4170 | Validation accuracy: 0.9103 | Validation F1-score: 0.9088




Epoch 4 | Train loss: 0.3228 | Train accuracy: 0.9933 | Train F1-score: 0.9931 | Validation loss: 0.7692 | Validation accuracy: 0.5441 | Validation F1-score: 0.3513




Epoch 5 | Train loss: 0.3199 | Train accuracy: 0.9951 | Train F1-score: 0.9949 | Validation loss: 0.7692 | Validation accuracy: 0.5441 | Validation F1-score: 0.3513




KeyboardInterrupt: 

In [7]:

# Specify the file path where the model is saved
file_path = 'modelvision.pth'

# Load the state dictionary from the saved file
torch.save(model.state_dict(), file_path)

In [6]:
file_path = 'modelvision.pth'
model_train = CombinedModel(model_vit,configuration,device)

model_train.load_state_dict(torch.load(file_path))

<All keys matched successfully>

In [7]:
import pandas as pd 

In [8]:
test_data = pd.read_csv("/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/test.csv")
test_data["img_path"]= ["/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/images/" + each for each in test_data["img_path"]]

In [9]:
device = torch.device("cuda" if torch.cuda.is_available() else "mps")
print(device)

mps


In [17]:
from sklearn.metrics import f1_score, accuracy_score

# Define the ViT model and feature extractor
img_processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

# Define the device to train the model on
device = torch.device("cuda" if torch.cuda.is_available() else "mps")
# trained_model = model_train.to(device)
model_train.to(device).eval()
predicted_labels=[] 
submission = {}
# Define the testing loop
with torch.no_grad():
    for image_path in test_data["img_path"]:
        image = Image.open(image_path).convert("RGB")
        processed_img = img_processor(image, return_tensors='pt', data_format='channels_first').to(device)
        outputs = model_train(processed_img['pixel_values'])
        # print(outputs)
        # preds_probs = outputs.prediction.softmax(dim=-1)
        preds = torch.argmax(torch.softmax(outputs, dim=1),axis=1)

        # print(preds)
        predicted_labels.append(preds.item())
        # print(preds.item())
        submission[image_path] = preds.item()


In [26]:
# test_data["img_path"]
test_data['new_column'] = test_data['img_path'].str.slice(77)
submission

{'/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/images/101/1678578332/1678578538.704966.jpg': 1,
 '/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/images/101/1678578332/1678578539.108019.jpg': 1,
 '/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/images/101/1678578332/1678578539.512872.jpg': 1,
 '/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/images/101/1678578332/1678578539.916711.jpg': 1,
 '/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/images/101/1678578332/1678578540.329089.jpg': 1,
 '/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/images/101/1678578332/1678578540.731776.jpg': 1,
 '/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/images/101/1678578332/1678578541.134508.jpg': 1,
 '/Users/vikashmediboina/Downloads/early-detection-of-3d-printing-issues/images/101/1678578332/1678578541.539697.jpg': 1,
 '/Users/vikashmediboina

In [34]:

df = pd.DataFrame({"img_path": submission.keys(), "has_under_extrusion": submission.values()})

In [35]:
df['img_path'] = df['img_path'].str.slice(78)
df

Unnamed: 0,img_path,has_under_extrusion
0,101/1678578332/1678578538.704966.jpg,1
1,101/1678578332/1678578539.108019.jpg,1
2,101/1678578332/1678578539.512872.jpg,1
3,101/1678578332/1678578539.916711.jpg,1
4,101/1678578332/1678578540.329089.jpg,1
...,...,...
25274,022/1672795514/1672796229.177132.jpg,1
25275,022/1672795514/1672796230.177629.jpg,1
25276,022/1672795514/1672796231.187994.jpg,1
25277,022/1672795514/1672796232.19111.jpg,1


In [36]:
df.to_csv("vision_output.csv",index=False)

In [None]:
# Save your PyTorch model in TorchScript format
traced_model = torch.jit.trace(model, input_data)
traced_model.save("ViTmodel.pt")