In [1]:
import torchvision
from datasets import load_dataset
import numpy as np
from transformers import AutoImageProcessor, FlavaImageModel, FlavaModel, FlavaFeatureExtractor, AutoModelForCausalLM
import torch
from sklearn.model_selection import train_test_split
from PIL import Image

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#This is a set of 60k 3d printed images or look-alike images with each segment almost having a 50% images. It is a medium sized image classification dataset which FLAVA has never before seen r been trained on previously
dataset = load_dataset("cmudrc/3d-printed-or-not") 

Found cached dataset parquet (/home/IAIS/jraghu/.cache/huggingface/datasets/cmudrc___parquet/cmudrc--3d-printed-or-not-c7389bae8477e941/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
100%|██████████| 1/1 [00:00<00:00, 160.94it/s]


In [6]:
# Convert the dictionary to arrays
features_array = dataset['train']['image'][:-100]
labels_array = dataset['train']['label'][:-100]

In [10]:
# Split the dataset into train and test sets
# train_features, test_features, train_labels, test_labels = train_test_split(
#     features_array, labels_array, test_size=0.2, random_state=42
# )
#keeping a set of 100 images untouched for testing purpose
test_set_features = dataset['train']['image'][-100:]
test_set_labels = dataset['train']['label'][-100:]


In [13]:
#Flava model calls for Image classification on Image encoder
image_processor = AutoImageProcessor.from_pretrained("facebook/flava-full")
model = FlavaImageModel.from_pretrained("facebook/flava-full").cuda().eval()


Could not find image processor class in the image processor config or the model config. Loading based on pattern matching with the model's feature extractor configuration.


In [14]:
rgb_images_train = []


#Iterate throught he MNIST dataset to convert it into 3 channel and append them into a list
for sample, label in zip(features_array, labels_array):
  
  rgb_image = sample.convert('RGB')
  
  # Append the RGB image to the list
  rgb_images_train.append(rgb_image)

len(rgb_images_train)

51420

In [15]:
#BAtching the dataset to get image features in batches to enable the efficeint and complete use of the GPU

def batching(batch_size, rgb_images):
    """
    Batch_size-> is the number o batches you want to process the dataset in; rgb_images-> the dataset list in RGB form
    returns a list of features of lists in tensors
    """

    # Define the batch size
    batch_size = 10

    features_in_Tensor = []
    # Calculate the number of batches
    num_batches = len(rgb_images) // batch_size

    # Process the images in batches
    for batch_idx in range(num_batches):
        # Get the start and end indices for the current batch
        start_idx = batch_idx * batch_size
        end_idx = start_idx + batch_size

        # Get the images for the current batch
        batch_images = rgb_images[start_idx:end_idx]

        # Process the images in the current batch
        processed_images = []
        with torch.no_grad():
            for image in batch_images:
                # Your processing logic here

                #processed_images = fe(image, return_tensors="pt").to("cuda")
                #image_features = flava.get_image_features(**processed_images)[:, 0, :]
                processed_images = image_processor(image, return_tensors="pt").to("cuda")
                image_features = model(**processed_images)
                image_features = image_features.last_hidden_state[:, 0, :]
                
                features_in_Tensor.append(image_features.detach().cpu().numpy())
        # Do something with the processed images
        
        
        # Clear memory of the processed images if no longer needed
        torch.cuda.empty_cache()
        del processed_images, image_features

    return features_in_Tensor

features_3d_print_train = batching(batch_size=10, rgb_images=rgb_images_train)

In [85]:
# Function to load and preprocess the images to convert each Jpeg image into np array type
# def preprocess_images(image_array):
#     num_images = len(image_array)

#     # Create an empty array to store the flattened and normalized images
#     processed_images = np.zeros((num_images, height * width * channels))

#     for img in range(num_images):
        

#         # Normalize the pixel values to a range between 0 and 1 (if the original range is [0, 255])
#         image = np.array(img) / 255.0

#         # Flatten the 3D image into a 1D feature vector
#         processed_images[img] = image.flatten()

#     return processed_images

# # Define the dimensions of the images
# height = 256
# width = 256
# channels = 3  # Assuming RGB images, change to 1 for grayscale

# # calling the preprocessing fn
# processed_images_train = preprocess_images(train_features)
# processed_images_test = preprocess_images(test_features)

In [18]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

#Converting the tensor into Numpy 
#features = features_in_Numpy.detach().numpy()
features_in_Numpy = np.squeeze(features_3d_print_train, axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features_in_Numpy, labels_array, test_size=0.2, random_state=42)

# Create a logistic regression model with L-BFGS optimization
logistic_model = LogisticRegression(solver='lbfgs', max_iter=1000)

# Train the logistic regression model
logistic_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = logistic_model.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Training set Accuracy:", accuracy)

Training set Accuracy: 0.9447685725398678


In [19]:

rgb_images_test = []


#Iterate throught he MNIST dataset to convert it into 3 channel and append them into a list
for sample, label in zip(test_set_features, test_set_labels):
  
  
  rgb_image = sample.convert('RGB')
  
  # Append the RGB image to the list
  rgb_images_test.append(rgb_image)


features_test = batching(batch_size=10, rgb_images=rgb_images_test)
features_test = np.squeeze(features_test, axis=1)

# Make predictions on the unseen dataset using the same Logistic regression linear classifier
y_pred_unseen = logistic_model.predict(features_test)

# Calculate accuracy
accuracy = accuracy_score(test_set_labels, y_pred_unseen)
print("tetsing set Accuracy:", accuracy)

tetsing set Accuracy: 0.8
