### Modeling COCO - QA
* [Review](https://arxiv.org/abs/1610.01465)

In [1]:
%cd ..

/home/datascience/Data Fusion


### Setup Environment:

In [2]:
import os
import pandas as pd

from src.classifiers import preprocess_data, process_labels,split_data

from src.classifiers import VQADataset
from torch.utils.data import DataLoader

from src.classifiers_cpu_metrics import calculate_memory

In [3]:
PATH = 'Embeddings_vlm/coco-qa/'
FILE = 'embeddings_clip.csv'

FILE_PATH = os.path.join(PATH, FILE)

## Get data

In [4]:
df = pd.read_csv(FILE_PATH)
df.drop(columns=['image_id'], inplace=True)
df.head()

Unnamed: 0,questions,answers,types,split,image_embedding_0,image_embedding_1,image_embedding_2,image_embedding_3,image_embedding_4,image_embedding_5,...,text_embedding_502,text_embedding_503,text_embedding_504,text_embedding_505,text_embedding_506,text_embedding_507,text_embedding_508,text_embedding_509,text_embedding_510,text_embedding_511
0,what is using umbrellas as a central theme,sculpture,0,train,-0.002133,0.026865,-0.009941,-0.000407,-0.001535,0.043021,...,0.013656,0.006139,0.018542,-0.027902,-0.010581,0.001189,-0.018054,0.006616,0.039209,-0.052176
1,what walks toward the rope perimeter fence,elephant,0,train,-0.037488,-0.025273,0.021552,0.056967,0.010018,-0.042177,...,-0.007028,-0.00252,0.001875,-0.028708,0.016469,-0.01162,-0.010105,0.034845,-0.021513,0.02962
2,what is the color of the horses,brown,2,train,-0.050188,0.034539,0.008536,0.017248,0.027789,-0.020876,...,0.009025,0.000571,-0.022466,0.010579,-0.025592,0.0348,-0.007262,0.01037,-0.009308,0.008854
3,where is the black cat laying down,sink,3,train,-0.003502,0.00714,0.014187,0.052844,0.012376,-0.005956,...,-0.012388,0.034164,0.007408,0.029274,-0.01885,-0.007775,0.033192,0.00118,-0.041182,0.004494
4,what is the color of the character,purple,2,train,0.046207,0.050791,-0.010908,0.025887,-0.029377,-0.023032,...,-0.009319,-0.034246,0.002291,-0.00491,-0.025292,0.012616,-0.017789,-0.012675,-0.004709,0.028756


## Data Perparation

In [5]:
# Split the data
train_df, test_df = split_data(df)

# Select features and labels vectors
text_columns = [column for column in df.columns if 'text' in column] #[f'text_{i}']
image_columns = [column for column in df.columns if 'image' in column] #[f'image_{i}']
label_columns = 'answers'


# Process and one-hot encode labels for training set
train_labels, mlb, train_columns = process_labels(train_df, col=label_columns)
test_labels = process_labels(test_df, col=label_columns, train_columns=train_columns)


train_dataset = VQADataset(train_df, text_columns, image_columns, label_columns, mlb, train_columns)
test_dataset = VQADataset(test_df, text_columns, image_columns, label_columns, mlb, train_columns)


train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False, num_workers=2)

Train Shape: (78736, 1028)
Test Shape: (38948, 1028)


### Models

In [6]:
text_input_size = len(text_columns)
image_input_size = len(image_columns)
output_size = len(mlb.classes_)
multilabel = False

In [7]:
calculate_memory(train_loader, test_loader, text_input_size, image_input_size, output_size)

Early fusion:
Average Memory per Batch in Train: 0.35 MB
Total Memory Usage per Epoch Train: 436.71 MB (excluding model parameters)
Test:
Average Memory per Batch in Test: 0.18 MB
Total Memory Usage per Epoch Test: 106.87 MB (excluding model parameters)
Model: 
Model Memory Usage: 0.71 MB

Late fusion:
Average Memory per Batch in Train: 0.35 MB
Total Memory Usage per Epoch Train: 436.71 MB (excluding model parameters)
Test:
Average Memory per Batch in Test: 0.18 MB
Total Memory Usage per Epoch Test: 106.87 MB (excluding model parameters)
Model: 
Model Memory Usage: 0.46 MB
