### Push dataset to Huggingface

In [None]:
from huggingface_hub import HfApi
import os 

api = HfApi(token=os.getenv("HF_TOKEN"))
api.upload_folder(
    folder_path="data/rheo_sigmoid",
    repo_id="dchip95/synthetic-oscillatory-rheology-vlm",
    repo_type="dataset",
)


##### check GPU VRAM and clear if in use

In [13]:
import torch 

if torch.cuda.is_available() and (torch.cuda.memory_allocated() // 1024 // 1024) > 10: 
    print(f"{torch.cuda.memory_allocated() // 1024 // 1024} MB currently allocated")
    print(f"{torch.cuda.memory_reserved() // 1024 // 1024} MB currently reserved")
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()

2418 MB currently allocated
2466 MB currently reserved


#### Import libraries and set torch device properties 

In [None]:
from transformers import AutoTokenizer, AutoModel, BitsAndBytesConfig, AutoModelForCausalLM, AutoProcessor
from PIL import Image
import cv2
import os
import time
%matplotlib inline

# model_id = "OpenGVLab/InternVL3-2B"
model_id = 'microsoft/Florence-2-base-ft'
device = "cuda" if torch.cuda.is_available() else "cpu"

# florence 2 
# load processor 
# inputs loaded into processor 
# processor processed inputs to go to model
# model object calls generate()

bnb_config = BitsAndBytesConfig(load_in_4bit=True,)

model = AutoModelForCausalLM.from_pretrained(model_id,
                                quantization_config=bnb_config,
                                low_cpu_mem_usage=True, 
                                trust_remote_code=True).eval()

model.to(device)

processor = AutoProcessor.from_pretrained(model_id, trust_remote_code=True)


#### From https://huggingface.co/blog/finetune-florence2 they freeze the vision encoder to make fine tuning less expensive 

In [23]:
for param in model.vision_tower.parameters(): 
    param.is_trainable = False 

#### Now we can begin the finetune process