In [16]:
from PIL import Image
import torch
from transformers import AutoProcessor, AutoTokenizer, CLIPModel
import os
from sklearn.metrics import confusion_matrix, classification_report

# Load the pre-trained CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)

# Get the text tokenizer and processor for images
tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-large-patch14")
processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Define the text prompts for different colors
color_prompts = ["a red car", "a blue car", "a green car", "a yellow car", "a black car","a white car","gray car"]
inputs = tokenizer(color_prompts, padding=True, return_tensors="pt").to(device)

# Get the text features (color descriptions)
text_features = model.get_text_features(**inputs)
text_features = text_features / text_features.norm(dim=-1, keepdim=True)  # Normalize text features

# Folder where your car images are stored
image_folder = "datacolor/color/blue"  # Change this to the folder containing your images

# Ground truth (the correct color for the images in your dataset)
ground_truth_color = "blue"

# Initialize performance metrics
total_images = 0
correct_predictions = 0

# Arrays to store the predicted and actual labels
y_true = []
y_pred = []

# Iterate over each image in the folder
for image_file in os.listdir(image_folder):
    if image_file.endswith(('png', 'jpg', 'jpeg')):
        total_images += 1

        # Load and preprocess the image
        image_path = os.path.join(image_folder, image_file)
        image = Image.open(image_path)

        # Preprocess the image to match CLIP's expected format
        inputs = processor(images=image, return_tensors="pt").to(device)

        # Get the image features
        image_features = model.get_image_features(**inputs)
        image_features = image_features / image_features.norm(dim=-1, keepdim=True)  # Normalize image features

        # Compute cosine similarity between image features and text features (without softmax)
        similarities = (image_features @ text_features.T).squeeze(0)  # Remove batch dimension

        # Get the color with the highest similarity score
        predicted_color_idx = similarities.argmax().item()
        predicted_color = color_prompts[predicted_color_idx].split()[1]  # extract the color from the prompt

        # Store the ground truth and predicted labels for evaluation
        y_true.append(ground_truth_color)
        y_pred.append(predicted_color)

        # Check if the prediction is correct (since ground truth is "green")
        if predicted_color == ground_truth_color:
            correct_predictions += 1

# Calculate accuracy
accuracy = correct_predictions / total_images * 100
print(f"Accuracy: {accuracy:.2f}%")

# Get unique labels from ground truth and predictions
unique_labels = sorted(list(set(y_true + y_pred)))  # Dynamically detect classes present in data

# Confusion Matrix and Classification Report
print("\nConfusion Matrix:")
conf_matrix = confusion_matrix(y_true, y_pred, labels=unique_labels)
print(conf_matrix)

print("\nClassification Report:")
class_report = classification_report(y_true, y_pred, target_names=unique_labels, labels=unique_labels)
print(class_report)
print("Unique labels in ground truth:", set(y_true))
print("Unique labels in predictions:", set(y_pred))




Accuracy: 76.32%

Confusion Matrix:
[[ 0  0  0  0  0]
 [ 4 29  3  1  1]
 [ 0  0  0  0  0]
 [ 0  0  0  0  0]
 [ 0  0  0  0  0]]

Classification Report:
              precision    recall  f1-score   support

       black       0.00      0.00      0.00         0
        blue       1.00      0.76      0.87        38
         car       0.00      0.00      0.00         0
       green       0.00      0.00      0.00         0
       white       0.00      0.00      0.00         0

    accuracy                           0.76        38
   macro avg       0.20      0.15      0.17        38
weighted avg       1.00      0.76      0.87        38

Unique labels in ground truth: {'blue'}
Unique labels in predictions: {'black', 'car', 'blue', 'white', 'green'}


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
from PIL import Image
import torch
from transformers import AutoProcessor, AutoTokenizer, CLIPModel
import os
from sklearn.metrics import confusion_matrix, classification_report

# Load the pre-trained CLIP model as the base
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)

# Load your fine-tuned weights (.pth file)
fine_tuned_weights_path = "path_to_your_model/model_finetuned.pth"
model.load_state_dict(torch.load(fine_tuned_weights_path, map_location=device))

# Get the text tokenizer and processor for images
tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-large-patch14")
processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Define the text prompts for different colors
color_prompts = ["a red car", "a blue car", "a green car", "a yellow car", "a black car", "a white car", "gray car"]
inputs = tokenizer(color_prompts, padding=True, return_tensors="pt").to(device)

# Get the text features (color descriptions)
text_features = model.get_text_features(**inputs)
text_features = text_features / text_features.norm(dim=-1, keepdim=True)  # Normalize text features

# Folder where your car images are stored
image_folder = "datacolor/color/blue"  # Change this to the folder containing your images

# Ground truth (the correct color for the images in your dataset)
ground_truth_color = "blue"

# Initialize performance metrics
total_images = 0
correct_predictions = 0

# Arrays to store the predicted and actual labels
y_true = []
y_pred = []

# Iterate over each image in the folder
for image_file in os.listdir(image_folder):
    if image_file.endswith(('png', 'jpg', 'jpeg')):
        total_images += 1

        # Load and preprocess the image
        image_path = os.path.join(image_folder, image_file)
        image = Image.open(image_path)

        # Preprocess the image to match CLIP's expected format
        inputs = processor(images=image, return_tensors="pt").to(device)

        # Get the image features
        image_features = model.get_image_features(**inputs)
        image_features = image_features / image_features.norm(dim=-1, keepdim=True)  # Normalize image features

        # Compute cosine similarity between image features and text features (without softmax)
        similarities = (image_features @ text_features.T).squeeze(0)  # Remove batch dimension

        # Get the color with the highest similarity score
        predicted_color_idx = similarities.argmax().item()
        predicted_color = color_prompts[predicted_color_idx].split()[1]  # extract the color from the prompt

        # Store the ground truth and predicted labels for evaluation
        y_true.append(ground_truth_color)
        y_pred.append(predicted_color)

        # Check if the prediction is correct (since ground truth is "blue")
        if predicted_color == ground_truth_color:
            correct_predictions += 1

# Calculate accuracy
accuracy = correct_predictions / total_images * 100
print(f"Accuracy: {accuracy:.2f}%")

# Get unique labels from ground truth and predictions
unique_labels = sorted(list(set(y_true + y_pred)))  # Dynamically detect classes present in data

# Confusion Matrix and Classification Report
print("\nConfusion Matrix:")
conf_matrix = confusion_matrix(y_true, y_pred, labels=unique_labels)
print(conf_matrix)

print("\nClassification Report:")
class_report = classification_report(y_true, y_pred, target_names=unique_labels, labels=unique_labels)
print(class_report)

print("Unique labels in ground truth:", set(y_true))
print("Unique labels in predictions:", set(y_pred))


In [1]:
import torch
from transformers import CLIPProcessor, CLIPModel
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt

# Define class names
class_names = ['black', 'blue', 'cyan', 'gray', 'green', 'red', 'white', 'yellow']

class CLIPWithClassificationHead(CLIPModel):
    def __init__(self, config):
        super().__init__(config)
        self.classification_head = torch.nn.Linear(512, 8)  # Adjust the output size for your task

    def forward(self, pixel_values, **kwargs):
        image_features = self.get_image_features(pixel_values)
        logits = self.classification_head(image_features)
        return logits

# Load the fine-tuned model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPWithClassificationHead.from_pretrained("openai/clip-vit-base-patch32")
model.load_state_dict(torch.load("fine_tuned-MODEL.pth", map_location=device))
model.to(device)
model.eval()

# Define the image preprocessing
transform = transforms.Compose([
    transforms.Resize((224, 224)),  # Resize to the input size used during training
    transforms.ToTensor(),
    transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
                         (0.26862954, 0.26130258, 0.27577711))  # Normalization used during training
])

# Load and preprocess the new image
image_path = "Blue/11133_1184.jpg"
image = Image.open(image_path).convert("RGB")
preprocessed_image = transform(image).unsqueeze(0).to(device)  # Add batch dimension and move to GPU if available

# Make predictions
with torch.no_grad():
    logits = model(preprocessed_image)
    probabilities = torch.softmax(logits, dim=1)
    predicted_class_idx = torch.argmax(probabilities, dim=1).item()
    predicted_class = class_names[predicted_class_idx]

    # Round probabilities to 3 decimal places and pair them with class names
    labeled_probabilities = {class_names[i]: round(prob, 3) for i, prob in enumerate(probabilities[0].tolist())}

# Plot the image with predictions
plt.figure(figsize=(6, 6))
plt.imshow(image)
plt.axis('off')

# Create a title showing the predicted class and labeled probabilities
probability_text = "\n".join([f"{label}: {prob}" for label, prob in labeled_probabilities.items()])
plt.title(f"Predicted Class: {predicted_class}\nProbabilities:\n{probability_text}")

plt.show()


  from .autonotebook import tqdm as notebook_tqdm


KeyboardInterrupt: 

In [4]:
from PIL import Image
import torch
from transformers import AutoProcessor, AutoTokenizer, CLIPModel
import os
from sklearn.metrics import confusion_matrix, classification_report

# Load the pre-trained CLIP model
device = "cuda" if torch.cuda.is_available() else "cpu"
model = CLIPModel.from_pretrained("openai/clip-vit-large-patch14").to(device)

# Get the text tokenizer and processor for images
tokenizer = AutoTokenizer.from_pretrained("openai/clip-vit-large-patch14")
processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14")

# Define the text prompts for different colors
color_prompts = ["a red car", "a blue car", "a green car", "a yellow car", "a black car"]
inputs = tokenizer(color_prompts, padding=True, return_tensors="pt").to(device)

# Get the text features (color descriptions)
text_features = model.get_text_features(**inputs)
text_features = text_features / text_features.norm(dim=-1, keepdim=True)  # Normalize text features

# Folder where your car images are stored
image_folder = "black"  # Change this to the folder containing your images

# Ground truth (the correct color for the images in your dataset)
ground_truth_color = "black"

# Initialize performance metrics
total_images = 0
correct_predictions = 0

# Arrays to store the predicted and actual labels
y_true = []
y_pred = []

# Iterate over each image in the folder
for image_file in os.listdir(image_folder):
    if image_file.endswith(('png', 'jpg', 'jpeg')):
        total_images += 1

        # Load and preprocess the image
        image_path = os.path.join(image_folder, image_file)
        image = Image.open(image_path)

        # Preprocess the image to match CLIP's expected format
        inputs = processor(images=image, return_tensors="pt").to(device)

        # Get the image features
        image_features = model.get_image_features(**inputs)
        image_features = image_features / image_features.norm(dim=-1, keepdim=True)  # Normalize image features

        # Compute cosine similarity between image features and text features (without softmax)
        similarities = (image_features @ text_features.T).squeeze(0)  # Remove batch dimension

        # Get the color with the highest similarity score
        predicted_color_idx = similarities.argmax().item()
        predicted_color = color_prompts[predicted_color_idx].split()[1]  # extract the color from the prompt

        # Store the ground truth and predicted labels for evaluation
        y_true.append(ground_truth_color)
        y_pred.append(predicted_color)

        # Check if the prediction is correct (since ground truth is "green")
        if predicted_color == ground_truth_color:
            correct_predictions += 1

# Calculate accuracy
accuracy = correct_predictions / total_images * 100
print(f"Accuracy: {accuracy:.2f}%")

# Get unique labels from ground truth and predictions
unique_labels = sorted(list(set(y_true + y_pred)))  # Dynamically detect classes present in data

# Confusion Matrix and Classification Report
print("\nConfusion Matrix:")
conf_matrix = confusion_matrix(y_true, y_pred, labels=unique_labels)
print(conf_matrix)

print("\nClassification Report:")
class_report = classification_report(y_true, y_pred, target_names=unique_labels, labels=unique_labels)
print(class_report)
print("Unique labels in ground truth:", set(y_true))
print("Unique labels in predictions:", set(y_pred))


Accuracy: 100.00%

Confusion Matrix:
[[45]]

Classification Report:
              precision    recall  f1-score   support

       black       1.00      1.00      1.00        45

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

Unique labels in ground truth: {'black'}
Unique labels in predictions: {'black'}




using clip model to detect the color of the cars !!!
