In [None]:
# COLAB CV CONFIGURATION: HUGE MULTI-MODEL INITIALIZER

# Step 1: Install and Import Dependencies
!pip install -q timm torchvision transformers datasets opencv-python
import os
import cv2
import torch
import torchvision
import timm
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from torchvision import transforms
from tensorflow.keras.applications import *
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Flatten, GlobalAveragePooling2D
from transformers import AutoFeatureExtractor, AutoModelForImageClassification
from PIL import Image

print("All dependencies imported!")

# Step 2: Load and Preprocess a Sample Image
def load_sample_image(path='sample.jpg'):
    # Download a sample image
    if not os.path.exists(path):
        !wget -q https://upload.wikimedia.org/wikipedia/commons/thumb/2/26/YellowLabradorLooking_new.jpg/640px-YellowLabradorLooking_new.jpg -O sample.jpg
    img = Image.open(path).convert('RGB')
    return img

img = load_sample_image()
plt.imshow(img)
plt.title("Sample Input Image")
plt.axis('off')
plt.show()

# Resize for all models
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor()
])
input_tensor = transform(img).unsqueeze(0)

# Step 3: TensorFlow / Keras Models
keras_models = {
    "VGG16": VGG16(weights='imagenet', include_top=False),
    "VGG19": VGG19(weights='imagenet', include_top=False),
    "ResNet50": ResNet50(weights='imagenet', include_top=False),
    "ResNet101": ResNet101(weights='imagenet', include_top=False),
    "InceptionV3": InceptionV3(weights='imagenet', include_top=False),
    "Xception": Xception(weights='imagenet', include_top=False),
    "MobileNet": MobileNet(weights='imagenet', include_top=False),
    "DenseNet201": DenseNet201(weights='imagenet', include_top=False),
    "NASNetMobile": NASNetMobile(weights='imagenet', include_top=False),
    "EfficientNetB7": EfficientNetB7(weights='imagenet', include_top=False)
}

def keras_model_summary():
    for name, base_model in keras_models.items():
        model = tf.keras.Sequential([
            base_model,
            GlobalAveragePooling2D(),
            Dense(1024, activation='relu'),
            Dense(10, activation='softmax')
        ])
        print(f"Summary of {name}:")
        model.build((None, 224, 224, 3))
        model.summary()
        print("="*80)

keras_model_summary()

# Step 4: PyTorch Vision Models
torch_models = {
    "resnet18": torchvision.models.resnet18(pretrained=True),
    "resnet50": torchvision.models.resnet50(pretrained=True),
    "alexnet": torchvision.models.alexnet(pretrained=True),
    "squeezenet": torchvision.models.squeezenet1_0(pretrained=True),
    "vgg16": torchvision.models.vgg16(pretrained=True),
    "densenet": torchvision.models.densenet161(pretrained=True),
    "inception": torchvision.models.inception_v3(pretrained=True, aux_logits=False),
    "googlenet": torchvision.models.googlenet(pretrained=True),
    "shufflenet": torchvision.models.shufflenet_v2_x1_0(pretrained=True),
    "mobilenet": torchvision.models.mobilenet_v2(pretrained=True),
    "resnext": torchvision.models.resnext50_32x4d(pretrained=True),
    "wide_resnet": torchvision.models.wide_resnet50_2(pretrained=True),
    "mnasnet": torchvision.models.mnasnet1_0(pretrained=True),
}

for name, model in torch_models.items():
    model.eval()
    out = model(input_tensor)
    print(f"{name} output shape: {out.shape}")

# Step 5: TIMM Models
timm_models = [
    'vit_base_patch16_224',
    'swin_base_patch4_window7_224',
    'efficientnet_b3a',
    'resnext101_32x8d',
    'regnety_160',
    'tf_efficientnet_b7_ns',
    'convnext_base',
    'beit_base_patch16_224'
]

print("TIMM models output shapes:")
for model_name in timm_models:
    model = timm.create_model(model_name, pretrained=True)
    model.eval()
    out = model(input_tensor)
    print(f"{model_name}: {out.shape}")

# Step 6: Transformers Vision Models
hf_models = [
    "google/vit-base-patch16-224",
    "microsoft/resnet-50",
    "facebook/deit-base-distilled-patch16-224",
    "microsoft/swin-tiny-patch4-window7-224"
]

for model_name in hf_models:
    print(f"Loading HuggingFace model: {model_name}")
    extractor = AutoFeatureExtractor.from_pretrained(model_name)
    model = AutoModelForImageClassification.from_pretrained(model_name)

    img_array = extractor(images=img, return_tensors="pt")
    with torch.no_grad():
        output = model(**img_array)
    print(f"{model_name} logits shape: {output.logits.shape}")

# Step 7: Sample CV Tasks
print("\nPerforming Sample Classification with ResNet18:")
resnet18 = torchvision.models.resnet18(pretrained=True)
resnet18.eval()
output = resnet18(input_tensor)
prob = torch.nn.functional.softmax(output[0], dim=0)
top5 = torch.topk(prob, 5)

# Download labels
!wget -q https://raw.githubusercontent.com/pytorch/hub/master/imagenet_classes.txt
with open("imagenet_classes.txt", "r") as f:
    categories = [s.strip() for s in f.readlines()]

for i in range(5):
    print(f"{categories[top5.indices[i]]}: {top5.values[i].item()*100:.2f}%")

# Step 8: Object Detection with Pretrained Faster R-CNN
print("\nRunning object detection with Faster R-CNN")
od_model = torchvision.models.detection.fasterrcnn_resnet50_fpn(pretrained=True)
od_model.eval()
image_tensor = transform(img).unsqueeze(0)
predictions = od_model(image_tensor)

for i in range(len(predictions[0]['boxes'])):
    score = predictions[0]['scores'][i].item()
    if score > 0.5:
        box = predictions[0]['boxes'][i].detach().numpy()
        print(f"Object {i}: Box={box}, Score={score:.2f}")

# Step 9: Semantic Segmentation with DeepLabV3
print("\nRunning semantic segmentation with DeepLabV3")
seg_model = torchvision.models.segmentation.deeplabv3_resnet101(pretrained=True)
seg_model.eval()
output = seg_model(image_tensor)['out']
seg = output.squeeze().argmax(0).detach().cpu().numpy()

plt.imshow(seg)
plt.title("Segmentation Output")
plt.axis("off")
plt.show()

print("\nDone. You have configured a huge number of CV models!")
