In [None]:
import numpy as np
import pandas as pd
import os
import cv2
import torch
import warnings
import tensorflow as tf
from tensorflow.keras import models, layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import OneHotEncoder, Normalizer
from sklearn.compose import ColumnTransformer
from torch.utils.data import DataLoader, Dataset
from torch.optim import AdamW
import torch.nn as nn
from transformers import ViTModel, ViTImageProcessor
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.impute import KNNImputer
warnings.filterwarnings("ignore")

In [None]:
# Load Data
df=pd.read_csv('/kaggle/input/visual-taxonomy/train.csv')
df.sample(5)

In [None]:
pd.read_parquet('/kaggle/input/visual-taxonomy/category_attributes.parquet')

In [None]:
#Seperate Mens Tshirt Category
df_TShirt = df[df['Category']=='Men Tshirts']
df_TShirt.sample(5)

In [None]:
#Drop undefined atributes
df_TShirt.drop(['attr_6','attr_7','attr_8','attr_9','attr_10'], axis=1, inplace=True)
ids=df_TShirt['id']
df_TShirt.head()

In [None]:
#Load the images corresponding to the ids in the df_TShirt_new
images=[]
for id in ids:
    path = os.path.join('/kaggle/input/visual-taxonomy/train_images', f'{int(id):06}.jpg')
    image=cv2.imread(path)
    images.append(image)
len(images)

In [None]:
#One Hot Encoding and dropping the column category
df_TShirt_encoded = pd.get_dummies(df_TShirt, columns=['attr_1','attr_2','attr_3','attr_4','attr_5'], drop_first=True)
df_TShirt_encoded=df_TShirt_encoded.drop('Category', axis=1).astype(int)

In [None]:
import torch
import torch.nn as nn
import numpy as np
from transformers import ViTModel, ViTImageProcessor

# Load pre-trained ViT model and image processor
model = ViTModel.from_pretrained('google/vit-base-patch16-224-in21k')
processor = ViTImageProcessor.from_pretrained('google/vit-base-patch16-224-in21k')

# Define your model class
class ViTModelNoHead(nn.Module):
    def __init__(self, vit_model):
        super(ViTModelNoHead, self).__init__()
        self.vit = vit_model

    def forward(self, x):
        outputs = self.vit(**x)
        cls_token = outputs.last_hidden_state[:, 0, :]
        return cls_token

# Initialize the model without a classification head
model_no_head = ViTModelNoHead(model)

# Transfer model to GPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_no_head.to(device)

# Wrap the model in DataParallel if multiple GPUs are available
if torch.cuda.device_count() > 1:
    model_no_head = nn.DataParallel(model_no_head)

model_no_head.eval()
feature_list = []
with torch.no_grad():
    # Process images in batches
    batch_size = 8  # Adjust based on your memory capacity
    for i in range(0, len(images), batch_size):
        batch_images = images[i:i + batch_size]

        # Preprocess images
        inputs = processor(images=batch_images, return_tensors="pt", padding=True)

        # Move the inputs to the appropriate device
        inputs = {k: v.to(device) for k, v in inputs.items()}

        # Forward pass to get features
        cls_features = model_no_head(inputs)

        # Collect features
        feature_list.extend(cls_features.squeeze().cpu().numpy())

# Convert to a numpy array
feature_array = np.array(feature_list)

In [None]:
feature_array.shape

In [None]:
df_TShirt_encoded.drop(['id','len'], axis=1, inplace=True)
df_TShirt_encoded.shape

In [None]:
#Make Attribute list
attribute_list=[]

for index, row in df_TShirt_encoded.iterrows():
    vector=row.values.tolist()
    attribute_list.append(vector)
attribute_array=np.array(attribute_list)
attribute_array.shape

In [None]:
#Imputation using KNN Imputer
df_TShirt_new = pd.concat([pd.DataFrame(feature_array), pd.DataFrame(attribute_array)], axis=1)
knn=KNNImputer()
df_TShirt_new=knn.fit_transform(df_TShirt_new)
df_TShirt_new=pd.DataFrame(df_TShirt_new)
df_TShirt_new.sample(5)

In [None]:
#Train Test Split
X=df_TShirt_new.iloc[:,0:768]
y=df_TShirt_new.iloc[:,768:]

norm=Normalizer()
X=norm.fit_transform(X)

In [None]:
#Load the test Mens TShirt images
test=pd.read_csv('/kaggle/input/visual-taxonomy/test.csv')
test=test[test['Category']=='Men Tshirts']
test_ids=test['id']

test_images=[]
for id_ in test_ids:
    path = os.path.join('/kaggle/input/visual-taxonomy/test_images', f'{int(id_):06}.jpg')
    image=cv2.imread(path)
    test_images.append(image)
len(test_images)

In [None]:
#Load the test Mens TShirt data
X_test=[]
with torch.no_grad():
    for image in test_images:
        inputs = processor(images=image, return_tensors="pt").to(device)  
        outputs = model(**inputs)
        #Extract the CLS token (representative of the image)
        cls_features = outputs.last_hidden_state[:, 0, :]  
        X_test.append(cls_features.squeeze().cpu().numpy())
        
X_test = np.array(X_test)
X_test.shape
X_test=norm.fit_transform(X_test)

In [None]:
#Make a classification Neural Network
Clf=Sequential()

Clf.add(Dense(512,input_dim=X.shape[1],activation='relu'))
Clf.add(Dropout(0.3))
Clf.add(BatchNormalization())
Clf.add(Dense(256, activation='relu'))
Clf.add(Dropout(0.3))
Clf.add(BatchNormalization())
Clf.add(Dense(128, activation='relu'))
Clf.add(Dropout(0.3))
Clf.add(BatchNormalization())
Clf.add(Dense(8, activation='sigmoid'))

Clf.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])
Clf.summary()

In [None]:
callbacks= ModelCheckpoint('best_model.keras',monitor='val_loss',save_best_only=True,mode='min',verbose=1)
history = Clf.fit(X, y, epochs=100, batch_size=32, validation_split=0.2, callbacks=[callbacks])

In [None]:
Clf_best=tf.keras.models.load_model('best_model.keras')

In [None]:
y_pred=Clf_best.predict(X_test)
y_pred=pd.DataFrame(y_pred, columns=['attr_1_default','attr_1_multicolor','attr_1_white','attr_2_round','attr_3_solid','attr_4_solid','attr_4_typography','attr_5_short sleeves'])

In [None]:
y_pred.head()

In [None]:
threshold = 0.5
y_pred = (y_pred >= threshold).astype(int)
y_pred.head()

In [None]:
def reverse_attr_1(row):
    if row['attr_1_default'] == 1.0:
        return 'default'
    elif row['attr_1_multicolor'] == 1.0:
        return 'multicolor'
    elif row['attr_1_white'] == 1.0:
        return 'white'
    return 'black'

def reverse_attr_2(row):
    if row['attr_2_round'] == 1.0:
        return 'round'
    return 'polo'

def reverse_attr_3(row):
    if row['attr_3_solid'] == 1.0:
        return 'Solid'
    return 'printed'

def reverse_attr_4(row):
    if row['attr_4_solid'] == 1.0:
        return 'solid'
    elif row['attr_4_typography'] == 1.0:
        return 'typography'
    return 'default'

def reverse_attr_5(row):
    if row['attr_5_short sleeves'] == 1.0:
        return 'short sleeves'
    return 'long sleeves'

# Apply the functions to reverse one-hot encoded columns
y_pred['attr_1'] = y_pred.apply(reverse_attr_1, axis=1)
y_pred['attr_2'] = y_pred.apply(reverse_attr_2, axis=1)
y_pred['attr_3'] = y_pred.apply(reverse_attr_3, axis=1)
y_pred['attr_4'] = y_pred.apply(reverse_attr_4, axis=1)
y_pred['attr_5'] = y_pred.apply(reverse_attr_5, axis=1)

# Drop the one-hot encoded columns
y_pred = y_pred[['attr_1', 'attr_2', 'attr_3', 'attr_4', 'attr_5']]
y_pred.head()

In [None]:
test['len']=int(5)
# Reset index before concatenation
y_pred_new = pd.concat([test[['id', 'Category']].reset_index(drop=True), 
                        y_pred.reset_index(drop=True)], axis=1)

# Verify result
y_pred_new.head()

In [None]:
y_pred_new['attr_6']='dummy'
y_pred_new['attr_7']='dummy'
y_pred_new['attr_8']='dummy'
y_pred_new['attr_9']='dummy'
y_pred_new['attr_10']='dummy'

In [None]:
y_pred_new.sample(10)

In [None]:
y_pred_new.to_csv('y_pred_new.csv', index=False)

In [None]:
y_pred_new[y_pred_new['attr_1']=='black'].shape