In [1]:
!pip install pyluach
!pip install transformers
!sudo apt-get install libhunspell-dev
!pip install pyspellchecker
!pip install torch torchvision
!pip install opencv-python
!pip install pytesseract
!apt-get install tesseract-ocr tesseract-ocr-heb
!pip install hunspell
!pip install huggingface_hub

Collecting pyluach
  Downloading pyluach-2.2.0-py3-none-any.whl.metadata (4.3 kB)
Downloading pyluach-2.2.0-py3-none-any.whl (25 kB)
Installing collected packages: pyluach
Successfully installed pyluach-2.2.0
Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
The following additional packages will be installed:
  dictionaries-common hunspell-en-us libhunspell-1.7-0 libtext-iconv-perl
Suggested packages:
  ispell | aspell | hunspell wordlist hunspell openoffice.org-hunspell
  | openoffice.org-core
The following NEW packages will be installed:
  dictionaries-common hunspell-en-us libhunspell-1.7-0 libhunspell-dev
  libtext-iconv-perl
0 upgraded, 5 newly installed, 0 to remove and 35 not upgraded.
Need to get 896 kB of archives.
After this operation, 3,130 kB of additional disk space will be used.
Get:1 http://archive.ubuntu.com/ubuntu jammy/main amd64 libtext-iconv-perl amd64 1.7-7build3 [14.3 kB]
Get:2 http://archive.ubuntu.com/ubuntu jammy/

In [2]:
import os
import gc
import pandas as pd
import pytesseract
from PIL import Image
import numpy as np
from sklearn.cluster import KMeans
from tqdm import tqdm
import re
from datetime import datetime, timedelta
import hunspell
from spellchecker import SpellChecker
from pyluach.dates import GregorianDate, HebrewDate
import torch
import cv2
from contextlib import contextmanager
import warnings
warnings.filterwarnings("ignore")

In [4]:
from huggingface_hub import login
login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
# Configuration
BATCH_SIZE = 25
MAX_TEXT_LENGTH = 1024

# Paths
MULTI_AD_INPUT = "/content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/multi_ads"
SINGLE_AD_INPUT = "/content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/single_ads"
OUTPUT_CSV = "/content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/ads_features.csv"
BATCH_OUTPUT_DIR = "/content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results"
PROGRESS_FILE = "/content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/progress.txt"

os.makedirs(BATCH_OUTPUT_DIR, exist_ok=True)

# Hebrew dictionary paths
HEBREW_DIC = "/content/drive/MyDrive/Miki_class/Project/Catalog/he_IL.dic"
HEBREW_AFF = "/content/drive/MyDrive/Miki_class/Project/Catalog/he_IL.aff"

# Issue dates mapping
ISSUE_TO_DATE = {
    '1193': '2024-11-03', '1194': '2024-11-10', '1196': '2024-11-24',
    '1197': '2024-12-01', '1198': '2024-12-08', '1199': '2024-12-15',
    '1200': '2024-12-22', '1201': '2024-12-29', '1202': '2025-01-05',
    '1203': '2025-01-12', '1204': '2025-01-19', '1205': '2025-01-26',
    '1206': '2025-02-02', '1216': '2025-04-27', '1217': '2025-05-04',
    '1218': '2025-05-11', '1219': '2025-05-18', '1220': '2025-05-26',
    '1221': '2025-06-01', '1222': '2025-06-08', '1223': '2025-06-15',
    '1224': '2025-06-22', '1225': '2025-06-29', '1226': '2025-07-06',
    '1227': '2025-07-13', '1228': '2025-07-20', '1229': '2025-07-27',
    '1230': '2025-08-04', '1231': '2025-08-10'
}

In [6]:
def get_memory_usage():
    if torch.cuda.is_available():
        allocated = torch.cuda.memory_allocated() / 1024**3
        reserved = torch.cuda.memory_reserved() / 1024**3
        return f"GPU: {allocated:.2f}GB allocated, {reserved:.2f}GB reserved"
    return "CPU mode"

def aggressive_cleanup():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        torch.cuda.synchronize()

def save_progress(batch_num, total_batches, completed_files):
    try:
        with open(PROGRESS_FILE, 'w') as f:
            f.write(f"Batch {batch_num}/{total_batches}\n")
            f.write(f"Files completed: {len(completed_files)}\n")
            f.write("Completed files:\n")
            for filename in completed_files:
                f.write(f"{filename}\n")
        print(f"Progress saved: Batch {batch_num}/{total_batches}, {len(completed_files)} files completed")
    except Exception as e:
        print(f"Error saving progress: {e}")

def load_progress():
    if not os.path.exists(PROGRESS_FILE):
        return set()

    try:
        completed_files = set()
        with open(PROGRESS_FILE, 'r') as f:
            lines = f.readlines()
            if len(lines) > 3:
                for line in lines[3:]:
                    filename = line.strip()
                    if filename:
                        completed_files.add(filename)
        print(f"Progress loaded: {len(completed_files)} files already completed")
        return completed_files
    except Exception as e:
        print(f"Error loading progress: {e}")
        return set()

In [7]:
@contextmanager
def single_model_context(model_name):
    print(f"Loading {model_name}...")
    model = None

    try:
        if model_name == "hebrew_bert":
            from transformers import AutoTokenizer, AutoModel
            tokenizer = AutoTokenizer.from_pretrained("onlplab/alephbert-base")
            model_obj = AutoModel.from_pretrained("onlplab/alephbert-base")
            model = {'tokenizer': tokenizer, 'model': model_obj}
            print("Hebrew BERT loaded")

        elif model_name == "hebrew_sentiment":
            from transformers import pipeline
            model = pipeline(
                "sentiment-analysis",
                model="avichr/heBERT_sentiment_analysis",
                tokenizer="avichr/heBERT_sentiment_analysis"
            )
            print("Hebrew sentiment loaded")

        elif model_name == "hebrew_ner":
            from transformers import pipeline
            model = pipeline(
                "token-classification",
                model="avichr/heBERT_NER",
                tokenizer="avichr/heBERT_NER"
            )
            print("Hebrew NER loaded")

        elif model_name == "object_detection":
            from transformers import DetrImageProcessor, DetrForObjectDetection
            processor = DetrImageProcessor.from_pretrained("facebook/detr-resnet-50")
            model_obj = DetrForObjectDetection.from_pretrained("facebook/detr-resnet-50")
            model = {'processor': processor, 'model': model_obj}
            print("Object detection loaded")

        elif model_name == "image_captioning":
            from transformers import BlipProcessor, BlipForConditionalGeneration
            processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
            model_obj = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")
            model = {'processor': processor, 'model': model_obj}
            print("Image captioning loaded")

        elif model_name == "vqa":
            from transformers import pipeline
            model = pipeline("visual-question-answering")
            print("VQA loaded")

        yield model

    except Exception as e:
        print(f"Error loading {model_name}: {e}")
        yield None

    finally:
        print(f"Cleaning up {model_name}...")
        if model:
            if isinstance(model, dict):
                for key, value in model.items():
                    if hasattr(value, 'to'):
                        value.to('cpu')
                    del value
            else:
                if hasattr(model, 'to'):
                    model.to('cpu')
                del model
        model = None
        aggressive_cleanup()
        print(f"{model_name} cleaned up")

In [8]:
try:
    h = hunspell.HunSpell(HEBREW_DIC, HEBREW_AFF)
    use_hunspell = True
    print("Hebrew hunspell initialized")
except Exception as e:
    print(f"Hunspell failed: {e}")
    use_hunspell = False

spell_en = SpellChecker()

Hunspell failed: (2, 'No such file or directory')


In [9]:
similar_chars = [('כ', 'ב'), ('ה', 'ח'), ('ו', 'ן'), ('י', 'ו'), ('ר', 'ד'), ('מ', 'ס')]
valid_words = ['בסייעתא', 'השמים', 'בסד']

def try_swap(word):
    if not use_hunspell:
        return word
    for char1, char2 in similar_chars:
        if char1 in word:
            if h.spell(word.replace(char1, char2)):
                return word.replace(char1, char2)
        if char2 in word:
            if h.spell(word.replace(char2, char1)):
                return word.replace(char2, char1)
    return word

def clean_text_heb(text):
    try:
        if text is np.nan or len(text) == 0:
            return ""
    except:
        return ""

    text = re.sub(r'[^א-ת\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    words = [word for word in words if len(word) > 1]

    if not use_hunspell:
        return " ".join(words)

    new_words = []
    for word in words:
        if not h.spell(word) and word not in valid_words:
            word = try_swap(word)
            if not h.spell(word):
                suggestions = h.suggest(word)
                if suggestions:
                    new_words.append(suggestions[0])
            else:
                new_words.append(word)
        else:
            new_words.append(word)

    new_words = [word for word in new_words if len(new_words) > 1]
    return " ".join(new_words)

def clean_text_eng(text):
    try:
        if text is np.nan or len(text) == 0:
            return ""
    except:
        return ""

    text = re.sub(r'[^a-zA-Z\s]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    words = text.split()
    words = [word.lower() for word in words if len(word) > 1 or word == 'I']
    words = [word for word in words if word.lower() in spell_en]

    new_words = []
    for word in words:
        if word.lower() not in spell_en and spell_en.candidates(word) is not None:
            suggestions = spell_en.candidates(word)
            new_words.append(next(iter(suggestions)))
        else:
            new_words.append(word)

    return " ".join(new_words)

In [10]:
category_keywords = {
    'religious_services': [
        'רב', 'מורה', 'סופר סת"ם', 'כשרות', 'שחיטה', 'מקווה', 'בית כנסת',
        'תלמוד תורה', 'ישיבה', 'כולל', 'בד"ץ', 'הכשר', 'שמירת שבת',
        'תפילין', 'מזוזות', 'ספר תורה', 'קידוש', 'הבדלה', 'רבנות'
    ],
    'lifecycle_events': [
        'ברית', 'בר מצווה', 'בת מצווה', 'חתונה', 'שידוך', 'איירוסין',
        'שבע ברכות', 'אבל', 'שבעה', 'שלושים', 'יאהרצייט', 'אזכרה',
        'פדיון הבן', 'חלקה', 'קבורה', 'בית עלמין', 'לווייה'
    ],
    'education': [
        'תלמוד תורה', 'ישיבה', 'בית יעקב', 'חינוך', 'מורה', 'מורת',
        'גננת', 'גן ילדים', 'לימודים', 'מלמד', 'רבנית', 'חדר'
    ],
    'children_babies': [
        'תינוק', 'תינוקת', 'עגלה', 'חיתול', 'צעצוע', 'מוצץ', 'לידה'
    ],
    'kosher_food': [
        'כשר', 'חלבי', 'בשרי', 'פרווה', 'מהדרין', 'בד"ץ', 'הכשר'
    ],
    'women_services': [
        'שמלות', 'בגדי נשים', 'פאות', 'כיסוי ראש', 'מיילדת'
    ],
    'men_services': [
        'חליפות', 'טלית', 'תפילין', 'כיפות', 'ארבע כנפות'
    ],
    'household': [
        'רהיטים', 'מטבח', 'ניקיון', 'כביסה', 'מקרר', 'תנור'
    ],
    'health_medical': [
        'רופא', 'רופאה', 'רוקח', 'מרפאה', 'בית חולים', 'ביטוח בריאות'
    ],
    'real_estate': [
        'דירה', 'דירות', 'השכרה', 'מכירה', 'נדלן', 'בית'
    ],
    'employment': [
        'דרושים', 'דרושות', 'משרה', 'עבודה', 'פרנסה'
    ]
}

gender_terms = {
    'female': [
        'נשים', 'אישה', 'בת', 'אמא', 'בנות', 'כלה', 'יולדת',
        'מורת', 'רופאת', 'אחות', 'גננת', 'מיילדת', 'נערה'
    ],
    'male': [
        'גברים', 'איש', 'בן', 'אבא', 'בנים', 'חתן', 'בחור',
        'מורה', 'רופא', 'אח', 'מלמד', 'רב', 'נער'
    ]
}

def detect_gender(text_heb, text_eng):
    if not text_heb and not text_eng:
        return 'neutral'

    text = (text_heb or '') + ' ' + (text_eng or '')
    text = text.lower()

    female_count = sum(1 for term in gender_terms['female'] if term in text)
    male_count = sum(1 for term in gender_terms['male'] if term in text)

    if female_count > male_count and female_count > 0:
        return 'female'
    elif male_count > female_count and male_count > 0:
        return 'male'
    else:
        return 'neutral'

def detect_category(text_heb, text_eng):
    text = (text_heb or '') + ' ' + (text_eng or '')
    text = text.lower()

    category_scores = {}
    for category, keywords in category_keywords.items():
        score = sum(1 for keyword in keywords if keyword in text)
        category_scores[category] = score

    if max(category_scores.values()) > 0:
        return max(category_scores, key=category_scores.get)
    else:
        return 'other'

In [11]:
def get_bert_embeddings(text, model_dict):
    try:
        if not text or pd.isna(text) or not model_dict:
            return None

        text = text[:MAX_TEXT_LENGTH]
        tokenizer = model_dict['tokenizer']
        model = model_dict['model']

        inputs = tokenizer(text, return_tensors="pt", truncation=True, max_length=MAX_TEXT_LENGTH)
        with torch.no_grad():
            outputs = model(**inputs)

        embeddings = outputs.last_hidden_state.mean(dim=1).squeeze()
        return embeddings.cpu().numpy().tolist()
    except Exception as e:
        print(f"BERT embedding error: {e}")
        return None

def get_sentiment_advanced(text, sentiment_pipeline):
    try:
        if not text or pd.isna(text) or not sentiment_pipeline:
            return 'neutral'

        text = text[:MAX_TEXT_LENGTH]
        result = sentiment_pipeline(text)

        if isinstance(result, list) and len(result) > 0:
            return result[0]['label'].lower()
        return 'neutral'
    except Exception as e:
        print(f"Sentiment error: {e}")
        return 'neutral'

def get_ner_entities(text, ner_pipeline):
    try:
        if not text or pd.isna(text) or not ner_pipeline:
            return []

        text = text[:MAX_TEXT_LENGTH]
        entities = ner_pipeline(text)

        processed_entities = []
        current_entity = ""
        current_type = ""

        for entity in entities:
            word = entity['word']
            entity_type = entity['entity']

            if word.startswith('##'):
                current_entity += word[2:]
            else:
                if current_entity and len(current_entity) > 2:
                    processed_entities.append({
                        'entity': current_entity,
                        'type': current_type,
                        'score': entity['score']
                    })

                current_entity = word
                current_type = entity_type

        if current_entity and len(current_entity) > 2:
            processed_entities.append({
                'entity': current_entity,
                'type': current_type,
                'score': entity.get('score', 0)
            })


        return processed_entities
    except Exception as e:
        print(f"NER error: {e}")
        return []

In [12]:
COCO_CLASSES = ['background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane',
                'bus', 'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
                'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
                'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra', 'giraffe',
                'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
                'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat',
                'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
                'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
                'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot',
                'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
                'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop',
                'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven',
                'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase',
                'scissors', 'teddy bear', 'hair drier', 'toothbrush']

In [13]:
def extract_dominant_colors(image, num_colors=5):
    try:
        image = image.resize((100, 100))
        image_data = np.array(image)
        image_data = image_data.reshape((-1, 3))
        kmeans = KMeans(n_clusters=num_colors, random_state=42, n_init=10)
        kmeans.fit(image_data)
        colors = kmeans.cluster_centers_.astype(int)
        return [tuple(color) for color in colors]
    except:
        return []

def check_if_greyscale(image):
    try:
        image_data = np.array(image)
        if len(image_data.shape) < 3:
            return True
        return np.all(image_data[:, :, 0] == image_data[:, :, 1]) and np.all(image_data[:, :, 1] == image_data[:, :, 2])
    except:
        return False

In [14]:
def detect_objects(image, model_dict):
    try:
        if not model_dict:
            return []

        processor = model_dict['processor']
        model = model_dict['model']

        inputs = processor(images=image, return_tensors="pt")
        with torch.no_grad():
            outputs = model(**inputs)

        target_sizes = torch.tensor([image.size[::-1]])
        results = processor.post_process_object_detection(outputs, target_sizes=target_sizes, threshold=0.5)[0]

        detected_objects = []
        for score, label, box in zip(results["scores"], results["labels"], results["boxes"]):
            if label.item() < len(COCO_CLASSES):
                label_name = COCO_CLASSES[label.item()]
                detected_objects.append(label_name)

        return list(set(detected_objects))
    except Exception as e:
        print(f"Object detection error: {e}")
        return []

In [15]:
def image_to_text_caption(image, model_dict):
    try:
        if not model_dict:
            return ""

        processor = model_dict['processor']
        model = model_dict['model']

        inputs = processor(image, return_tensors="pt")
        with torch.no_grad():
            out = model.generate(**inputs)
        return processor.decode(out[0], skip_special_tokens=True)
    except Exception as e:
        print(f"Image captioning error: {e}")
        return ""

In [16]:
def is_religious_vqa(image, vqa_pipeline):
    try:
        if not vqa_pipeline:
            return False

        question = "Are there any religious symbols or Jewish content in this image?"
        result = vqa_pipeline(image, question)
        answer = result['answer'] if isinstance(result, dict) else result[0]['answer']
        return 'yes' in str(answer).lower() or 'religious' in str(answer).lower()
    except Exception as e:
        print(f"VQA error: {e}")
        return False

In [17]:
def extract_comprehensive_prices(text_heb, text_eng):
    all_text = (text_heb or '') + ' ' + (text_eng or '')
    if not all_text:
        return []

    patterns = [
        r'₪\s*(\d{1,6}(?:\.\d{1,2})?)',
        r'(\d{1,6}(?:\.\d{1,2})?)\s*₪',
        r'ש"ח\s*(\d{1,6}(?:\.\d{1,2})?)',
        r'(\d{1,6}(?:\.\d{1,2})?)\s*ש"ח',
        r'מחיר\s*(\d{1,6}(?:\.\d{1,2})?)',
        r'רק\s*(\d{1,6}(?:\.\d{1,2})?)',
        r'(\d{1,6}(?:\.\d{1,2})?)',
        r'(\d+)\s*אלף',
    ]

    prices = []
    for pattern in patterns:
        matches = re.findall(pattern, all_text)
        for match in matches:
            try:
                if 'אלף' in pattern:
                    price_val = float(match) * 1000
                else:
                    price_val = float(match)

                if 0.1 <= price_val <= 1000000:
                    prices.append(price_val)
            except:
                continue

    return sorted(list(set(prices)))

def check_holiday_proximity(date_str):
    if not date_str:
        return {
            'is_near_holiday': False,
            'nearby_holidays': [],
            'days_to_holiday': None,
            'is_pre_holiday': False,
            'is_post_holiday': False
        }

    try:
        base_date = datetime.strptime(date_str, '%Y-%m-%d')
        nearby_holidays = []
        min_days_to_holiday = float('inf')

        for days_offset in range(-7, 8):
            check_date = base_date + timedelta(days=days_offset)
            gregorian_date = GregorianDate(check_date.year, check_date.month, check_date.day)

            try:
                holiday = gregorian_date.holiday()
                if holiday:
                    nearby_holidays.append({
                        'holiday': holiday,
                        'date': check_date.strftime('%Y-%m-%d'),
                        'days_offset': days_offset
                    })
                    if abs(days_offset) < abs(min_days_to_holiday):
                        min_days_to_holiday = days_offset
            except:
                continue

        return {
            'is_near_holiday': len(nearby_holidays) > 0,
            'nearby_holidays': str(nearby_holidays),
            'days_to_holiday': min_days_to_holiday if min_days_to_holiday != float('inf') else None,
            'is_pre_holiday': min_days_to_holiday > 0 if min_days_to_holiday != float('inf') else False,
            'is_post_holiday': min_days_to_holiday < 0 if min_days_to_holiday != float('inf') else False
        }
    except:
        return {
            'is_near_holiday': False,
            'nearby_holidays': [],
            'days_to_holiday': None,
            'is_pre_holiday': False,
            'is_post_holiday': False
        }


def check_shabbat_timing(date_str):
    if not date_str:
        return {'is_pre_shabbat': False, 'weekday': None}

    try:
        date_obj = datetime.strptime(date_str, '%Y-%m-%d')
        weekday = date_obj.weekday()
        return {
            'is_pre_shabbat': weekday in [3, 4],
            'weekday': date_obj.strftime('%A')
        }
    except:
        return {'is_pre_shabbat': False, 'weekday': None}

def detect_shabbat_content(text):
    if not text:
        return {'has_shabbat_terms': False, 'shabbat_term_count': 0}

    shabbat_terms = ['שבת', 'שבתות', 'מוצש', 'מוצאי שבת', 'הדלקת נרות', 'קידוש', 'הבדלה']
    text_lower = text.lower()
    found_terms = [term for term in shabbat_terms if term in text_lower]

    return {
        'has_shabbat_terms': len(found_terms) > 0,
        'shabbat_term_count': len(found_terms)
    }

In [18]:
def process_basic_features(image_path, filename, ad_type):
    try:
        image = Image.open(image_path).convert("RGB")

        text_heb = pytesseract.image_to_string(image, lang="heb")
        text_eng = pytesseract.image_to_string(image, lang="eng")

        text_heb_clean = clean_text_heb(text_heb)
        text_eng_clean = clean_text_eng(text_eng)

        width, height = image.size
        colors = extract_dominant_colors(image)
        is_greyscale = check_if_greyscale(image)

        gender = detect_gender(text_heb_clean, text_eng_clean)
        category = detect_category(text_heb_clean, text_eng_clean)
        prices = extract_comprehensive_prices(text_heb_clean, text_eng_clean)

        if ad_type == 'multi' and '_' in filename:
            parts = filename.split('_')
            issue = parts[1] if len(parts) > 1 else 'unknown'
        else:
            issue = 'single'

        issue_date = ISSUE_TO_DATE.get(issue)
        holiday_info = check_holiday_proximity(issue_date)
        shabbat_timing = check_shabbat_timing(issue_date)
        shabbat_content = detect_shabbat_content(text_heb_clean)

        has_discount = any(term in (text_heb_clean + ' ' + text_eng_clean).lower()
                          for term in ['הנחה', 'מבצע', 'סייל', 'זול', 'הזדמנות'])


        return {
            'filename': filename,
            'ad_type': ad_type,
            'issue': issue,
            'issue_date': issue_date,
            'image_path': image_path,
            'width': width,
            'height': height,
            'aspect_ratio': width / height if height > 0 else 0,
            'is_greyscale': is_greyscale,
            'text_heb_raw': text_heb,
            'text_eng_raw': text_eng,
            'text_heb_clean': text_heb_clean,
            'text_eng_clean': text_eng_clean,
            'text_heb_length': len(text_heb_clean.split()) if text_heb_clean else 0,
            'text_eng_length': len(text_eng_clean.split()) if text_eng_clean else 0,
            'main_colors': colors,
            'total_colors': len(set(colors)) if colors else 0,
            'gender_target': gender,
            'product_category': category,
            'prices': prices,
            'min_price': min(prices) if prices else None,
            'max_price': max(prices) if prices else None,
            'avg_price': sum(prices)/len(prices) if prices else None,
            'price_count': len(prices),
            'has_discount': has_discount,
            'is_near_holiday': holiday_info['is_near_holiday'],
            'nearby_holidays': holiday_info['nearby_holidays'],
            'days_to_holiday': holiday_info['days_to_holiday'],
            'is_pre_holiday': holiday_info['is_pre_holiday'],
            'is_post_holiday': holiday_info['is_post_holiday'],
            'weekday': shabbat_timing['weekday'],
            'is_pre_shabbat': shabbat_timing['is_pre_shabbat'],
            'has_shabbat_terms': shabbat_content['has_shabbat_terms'],
            'shabbat_term_count': shabbat_content['shabbat_term_count'],
            'text_embeddings': None,
            'sentiment_advanced': 'neutral',
            'ner_entities': [],
            'person_entities': 0,
            'location_entities': 0,
            'organization_entities': 0,
            'detected_objects': [],
            'object_count': 0,
            'has_people': False,
            'has_food': False,
            'has_furniture': False,
            'has_electronics': False,
            'image_caption': "",
            'is_religious_visual': False
        }

    except Exception as e:
        print(f"Error processing {filename}: {e}")
        return None

In [19]:
def process_batch_with_individual_models(batch_df):
    print(f"Processing batch of {len(batch_df)} images")
    print(f"Memory before processing: {get_memory_usage()}")

    # Phase 1: Hebrew BERT Embeddings
    print("Phase 1: BERT Embeddings")
    with single_model_context("hebrew_bert") as bert_model:
        if bert_model:
            batch_df['text_embeddings'] = batch_df['text_heb_clean'].apply(
                lambda x: get_bert_embeddings(x, bert_model)
            )
        print(f"Memory after BERT: {get_memory_usage()}")

    # Phase 2: Hebrew Sentiment Analysis
    print("Phase 2: Sentiment Analysis")
    with single_model_context("hebrew_sentiment") as sentiment_model:
        if sentiment_model:
            batch_df['sentiment_advanced'] = batch_df['text_heb_clean'].apply(
                lambda x: get_sentiment_advanced(x, sentiment_model)
            )
        print(f"Memory after Sentiment: {get_memory_usage()}")

    # Phase 3: Named Entity Recognition
    print("Phase 3: Named Entity Recognition")
    with single_model_context("hebrew_ner") as ner_model:
        if ner_model:
            batch_df['ner_entities'] = batch_df['text_heb_clean'].apply(
                lambda x: get_ner_entities(x, ner_model)
            )

            batch_df['person_entities'] = batch_df['ner_entities'].apply(
                lambda x: len([e for e in x if 'PER' in e.get('type', '')])
            )
            batch_df['location_entities'] = batch_df['ner_entities'].apply(
                lambda x: len([e for e in x if 'LOC' in e.get('type', '')])
            )
            batch_df['organization_entities'] = batch_df['ner_entities'].apply(
                lambda x: len([e for e in x if 'ORG' in e.get('type', '')])
            )
        print(f"Memory after NER: {get_memory_usage()}")

    # Phase 4: Object Detection
    print("Phase 4: Object Detection")
    with single_model_context("object_detection") as obj_model:
        if obj_model:
            for idx, row in batch_df.iterrows():
                try:
                    image = Image.open(row['image_path']).convert("RGB")
                    detected_objects = detect_objects(image, obj_model)

                    batch_df.at[idx, 'detected_objects'] = detected_objects
                    batch_df.at[idx, 'object_count'] = len(detected_objects)
                    batch_df.at[idx, 'has_people'] = 'person' in detected_objects
                    batch_df.at[idx, 'has_food'] = any(obj in detected_objects for obj in ['apple', 'banana', 'cake', 'pizza', 'sandwich'])
                    batch_df.at[idx, 'has_furniture'] = any(obj in detected_objects for obj in ['chair', 'dining table', 'couch', 'bed'])
                    batch_df.at[idx, 'has_electronics'] = any(obj in detected_objects for obj in ['tv', 'laptop', 'cell phone', 'remote'])
                except Exception as e:
                    print(f"Object detection error for {row['filename']}: {e}")
        print(f"Memory after Object Detection: {get_memory_usage()}")

    # Phase 5: Image Captioning
    print("Phase 5: Image Captioning")
    with single_model_context("image_captioning") as caption_model:
        if caption_model:
            for idx, row in batch_df.iterrows():
                try:
                    image = Image.open(row['image_path']).convert("RGB")
                    caption = image_to_text_caption(image, caption_model)
                    batch_df.at[idx, 'image_caption'] = caption
                except Exception as e:
                    print(f"Image captioning error for {row['filename']}: {e}")
        print(f"Memory after Image Captioning: {get_memory_usage()}")

    # Phase 6: Visual Question Answering
    print("Phase 6: Visual Question Answering")
    with single_model_context("vqa") as vqa_model:
        if vqa_model:
            for idx, row in batch_df.iterrows():
                try:
                    image = Image.open(row['image_path']).convert("RGB")
                    is_religious = is_religious_vqa(image, vqa_model)
                    batch_df.at[idx, 'is_religious_visual'] = is_religious
                except Exception as e:
                    print(f"VQA error for {row['filename']}: {e}")
        print(f"Memory after VQA: {get_memory_usage()}")

    print(f"Batch processing complete. Final memory: {get_memory_usage()}")
    return batch_df

In [20]:
def get_image_files():
    all_files = []

    if os.path.exists(MULTI_AD_INPUT):
        multi_files = [f for f in os.listdir(MULTI_AD_INPUT) if f.endswith(('.jpg', '.jpeg', '.png'))]
        all_files.extend([(os.path.join(MULTI_AD_INPUT, f), f, 'multi') for f in multi_files])
        print(f"Found {len(multi_files)} multi-ad files")
    else:
        print(f"Multi-ad directory not found: {MULTI_AD_INPUT}")

    if os.path.exists(SINGLE_AD_INPUT):
        single_files = [f for f in os.listdir(SINGLE_AD_INPUT) if f.endswith(('.jpg', '.jpeg', '.png'))]
        all_files.extend([(os.path.join(SINGLE_AD_INPUT, f), f, 'single') for f in single_files])
        print(f"Found {len(single_files)} single-ad files")
    else:
        print(f"Single-ad directory not found: {SINGLE_AD_INPUT}")

    return all_files

def save_batch_results(batch_df, batch_num):
    batch_file = os.path.join(BATCH_OUTPUT_DIR, f"batch_{batch_num:03d}.csv")
    try:
        batch_df.to_csv(batch_file, index=False)
        print(f"Batch {batch_num} saved to: {batch_file}")
        return batch_file
    except Exception as e:
        print(f"Error saving batch {batch_num}: {e}")
        return None

def combine_all_batches():
    print("Combining all batch files...")
    batch_files = [f for f in os.listdir(BATCH_OUTPUT_DIR) if f.startswith('batch_') and f.endswith('.csv')]
    batch_files.sort()

    if not batch_files:
        print("No batch files found to combine")
        return None

    all_dfs = []
    for batch_file in batch_files:
        try:
            batch_path = os.path.join(BATCH_OUTPUT_DIR, batch_file)
            df = pd.read_csv(batch_path)
            all_dfs.append(df)
            print(f"Loaded {batch_file}: {len(df)} records")
        except Exception as e:
            print(f"Error loading {batch_file}: {e}")

    if all_dfs:
        combined_df = pd.concat(all_dfs, ignore_index=True)
        combined_df.to_csv(OUTPUT_CSV, index=False)
        print(f"Combined dataset saved to: {OUTPUT_CSV}")
        print(f"Total records: {len(combined_df)}")
        return combined_df

    return None

def run_processing_pipeline():

    all_files = get_image_files()
    if not all_files:
        print("No files found to process")
        return None

    print(f"Total files to process: {len(all_files)}")

    completed_files = load_progress()

    remaining_files = [(path, filename, ad_type) for path, filename, ad_type in all_files
                      if filename not in completed_files]

    if not remaining_files:
        print("All files already processed. Combining existing batches...")
        return combine_all_batches()

    print(f"Remaining files to process: {len(remaining_files)}")

    total_batches = (len(remaining_files) + BATCH_SIZE - 1) // BATCH_SIZE
    print(f"Will process in {total_batches} batches of {BATCH_SIZE} files each")

    for batch_num in range(1, total_batches + 1):
        batch_start = (batch_num - 1) * BATCH_SIZE
        batch_end = min(batch_start + BATCH_SIZE, len(remaining_files))
        batch_files = remaining_files[batch_start:batch_end]

        print(f"\n" + "="*40)
        print(f"PROCESSING BATCH {batch_num}/{total_batches}")
        print(f"Files {batch_start+1}-{batch_end} of {len(remaining_files)}")
        print(f"Memory at start: {get_memory_usage()}")
        print("="*40)

        print("Phase 1: Basic feature extraction")
        batch_data = []
        for image_path, filename, ad_type in tqdm(batch_files, desc="Basic processing"):
            result = process_basic_features(image_path, filename, ad_type)
            if result:
                batch_data.append(result)

        if not batch_data:
            print(f"No valid data in batch {batch_num}, skipping")
            continue

        batch_df = pd.DataFrame(batch_data)
        print(f"Basic features extracted for {len(batch_df)} files")

        batch_df = process_batch_with_individual_models(batch_df)

        batch_file = save_batch_results(batch_df, batch_num)
        if batch_file:
            print(f"Batch {batch_num} successfully saved")

        new_completed_files = completed_files.union(set(batch_df['filename'].tolist()))
        save_progress(batch_num, total_batches, new_completed_files)
        completed_files = new_completed_files


        print(f"Batch {batch_num} complete. Total completed: {len(completed_files)}")

        aggressive_cleanup()
        print(f"Memory after cleanup: {get_memory_usage()}")

    print("\n" + "="*60)
    print("All batches processed. Combining results...")
    final_df = combine_all_batches()

    if final_df is not None:
        print("="*60)
        print("PROCESSING COMPLETE")
        print("="*60)
        return final_df
    else:
        print("Error combining batch results")
        return None

In [None]:
print("Starting the complete processing pipeline")
df_final = run_processing_pipeline()

Starting the complete processing pipeline
Starting optimized batch processing pipeline
Found 11116 multi-ad files
Found 2907 single-ad files
Total files to process: 14023
Progress loaded: 2250 files already completed
Remaining files to process: 11773
Will process in 471 batches of 25 files each

PROCESSING BATCH 1/471
Files 1-25 of 11773
Memory at start: GPU: 0.00GB allocated, 0.00GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:11<00:00,  2.85s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.00GB allocated, 0.00GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


tokenizer_config.json:   0%|          | 0.00/288 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/565 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/504M [00:00<?, ?B/s]

Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded


model.safetensors:   0%|          | 0.00/504M [00:00<?, ?B/s]

Memory after BERT: GPU: 0.00GB allocated, 0.00GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


config.json:   0%|          | 0.00/677 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cuda:0


Hebrew sentiment loaded


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


config.json:   0%|          | 0.00/838 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/436M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/436M [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


preprocessor_config.json:   0%|          | 0.00/290 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/167M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...


Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.


object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...


preprocessor_config.json:   0%|          | 0.00/287 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/506 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


config.json: 0.00B [00:00, ?B/s]

pytorch_model.bin:   0%|          | 0.00/470M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/320 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/470M [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

preprocessor_config.json:   0%|          | 0.00/251 [00:00<?, ?B/s]

Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.
Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 1 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_001.csv
Batch 1 successfully saved
Progress saved: Batch 1/471, 2275 files completed
Batch 1 complete. Total completed: 2275
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 2/471
Files 26-50 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction



Basic processing:   0%|          | 0/25 [00:00<?, ?it/s][A
Basic processing:   4%|▍         | 1/25 [00:04<01:45,  4.40s/it][A
Basic processing:   8%|▊         | 2/25 [00:09<01:44,  4.52s/it][A
Basic processing:  12%|█▏        | 3/25 [00:13<01:40,  4.58s/it][A
Basic processing:  16%|█▌        | 4/25 [00:19<01:45,  5.01s/it][A
Basic processing:  20%|██        | 5/25 [00:20<01:09,  3.49s/it][A
Basic processing:  24%|██▍       | 6/25 [00:21<00:50,  2.66s/it][A
Basic processing:  28%|██▊       | 7/25 [00:21<00:36,  2.05s/it][A
Basic processing:  32%|███▏      | 8/25 [00:22<00:27,  1.63s/it][A
Basic processing:  36%|███▌      | 9/25 [00:24<00:27,  1.69s/it][A
Basic processing:  40%|████      | 10/25 [00:28<00:34,  2.27s/it][A
Basic processing:  44%|████▍     | 11/25 [00:35<00:52,  3.75s/it][A
Basic processing:  48%|████▊     | 12/25 [00:38<00:46,  3.56s/it][A
Basic processing:  52%|█████▏    | 13/25 [00:45<00:55,  4.66s/it][A
Basic processing:  56%|█████▌    | 14/25 [00:47<00:

Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 2 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_002.csv
Batch 2 successfully saved
Progress saved: Batch 2/471, 2300 files completed
Batch 2 complete. Total completed: 2300
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 3/471
Files 51-75 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:06<00:00,  2.66s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 3 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_003.csv
Batch 3 successfully saved
Progress saved: Batch 3/471, 2325 files completed
Batch 3 complete. Total completed: 2325
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 4/471
Files 76-100 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:40<00:00,  1.62s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 4 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_004.csv
Batch 4 successfully saved
Progress saved: Batch 4/471, 2350 files completed
Batch 4 complete. Total completed: 2350
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 5/471
Files 101-125 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:12<00:00,  2.90s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.91GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 5 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_005.csv
Batch 5 successfully saved
Progress saved: Batch 5/471, 2375 files completed
Batch 5 complete. Total completed: 2375
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 6/471
Files 126-150 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:41<00:00,  1.67s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 6 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_006.csv
Batch 6 successfully saved
Progress saved: Batch 6/471, 2400 files completed
Batch 6 complete. Total completed: 2400
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 7/471
Files 151-175 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:54<00:00,  2.16s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 7 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_007.csv
Batch 7 successfully saved
Progress saved: Batch 7/471, 2425 files completed
Batch 7 complete. Total completed: 2425
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 8/471
Files 176-200 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:59<00:00,  2.40s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 8 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_008.csv
Batch 8 successfully saved
Progress saved: Batch 8/471, 2450 files completed
Batch 8 complete. Total completed: 2450
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 9/471
Files 201-225 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:58<00:00,  2.34s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 9 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_009.csv
Batch 9 successfully saved
Progress saved: Batch 9/471, 2475 files completed
Batch 9 complete. Total completed: 2475
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 10/471
Files 226-250 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:02<00:00,  2.49s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 10 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_010.csv
Batch 10 successfully saved
Progress saved: Batch 10/471, 2500 files completed
Batch 10 complete. Total completed: 2500
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 11/471
Files 251-275 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:51<00:00,  2.06s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 11 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_011.csv
Batch 11 successfully saved
Progress saved: Batch 11/471, 2525 files completed
Batch 11 complete. Total completed: 2525
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 12/471
Files 276-300 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:58<00:00,  2.35s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 12 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_012.csv
Batch 12 successfully saved
Progress saved: Batch 12/471, 2550 files completed
Batch 12 complete. Total completed: 2550
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 13/471
Files 301-325 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:54<00:00,  2.19s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 13 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_013.csv
Batch 13 successfully saved
Progress saved: Batch 13/471, 2575 files completed
Batch 13 complete. Total completed: 2575
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 14/471
Files 326-350 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:01<00:00,  2.47s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 14 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_014.csv
Batch 14 successfully saved
Progress saved: Batch 14/471, 2600 files completed
Batch 14 complete. Total completed: 2600
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 15/471
Files 351-375 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:51<00:00,  2.06s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 15 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_015.csv
Batch 15 successfully saved
Progress saved: Batch 15/471, 2625 files completed
Batch 15 complete. Total completed: 2625
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 16/471
Files 376-400 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:09<00:00,  2.78s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 16 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_016.csv
Batch 16 successfully saved
Progress saved: Batch 16/471, 2650 files completed
Batch 16 complete. Total completed: 2650
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 17/471
Files 401-425 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:58<00:00,  2.32s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 17 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_017.csv
Batch 17 successfully saved
Progress saved: Batch 17/471, 2675 files completed
Batch 17 complete. Total completed: 2675
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 18/471
Files 426-450 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:57<00:00,  2.30s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 18 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_018.csv
Batch 18 successfully saved
Progress saved: Batch 18/471, 2700 files completed
Batch 18 complete. Total completed: 2700
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 19/471
Files 451-475 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:36<00:00,  1.47s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 19 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_019.csv
Batch 19 successfully saved
Progress saved: Batch 19/471, 2725 files completed
Batch 19 complete. Total completed: 2725
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 20/471
Files 476-500 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:39<00:00,  1.58s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 20 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_020.csv
Batch 20 successfully saved
Progress saved: Batch 20/471, 2750 files completed
Batch 20 complete. Total completed: 2750
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 21/471
Files 501-525 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:55<00:00,  2.23s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 21 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_021.csv
Batch 21 successfully saved
Progress saved: Batch 21/471, 2775 files completed
Batch 21 complete. Total completed: 2775
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 22/471
Files 526-550 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:53<00:00,  2.15s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 22 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_022.csv
Batch 22 successfully saved
Progress saved: Batch 22/471, 2800 files completed
Batch 22 complete. Total completed: 2800
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 23/471
Files 551-575 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:03<00:00,  2.52s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 23 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_023.csv
Batch 23 successfully saved
Progress saved: Batch 23/471, 2825 files completed
Batch 23 complete. Total completed: 2825
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 24/471
Files 576-600 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:53<00:00,  2.13s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.38GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 24 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_024.csv
Batch 24 successfully saved
Progress saved: Batch 24/471, 2850 files completed
Batch 24 complete. Total completed: 2850
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 25/471
Files 601-625 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:52<00:00,  2.09s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 25 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_025.csv
Batch 25 successfully saved
Progress saved: Batch 25/471, 2875 files completed
Batch 25 complete. Total completed: 2875
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 26/471
Files 626-650 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:59<00:00,  2.39s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 26 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_026.csv
Batch 26 successfully saved
Progress saved: Batch 26/471, 2900 files completed
Batch 26 complete. Total completed: 2900
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 27/471
Files 651-675 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:47<00:00,  1.89s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 27 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_027.csv
Batch 27 successfully saved
Progress saved: Batch 27/471, 2925 files completed
Batch 27 complete. Total completed: 2925
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 28/471
Files 676-700 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:55<00:00,  2.24s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 28 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_028.csv
Batch 28 successfully saved
Progress saved: Batch 28/471, 2950 files completed
Batch 28 complete. Total completed: 2950
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 29/471
Files 701-725 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:49<00:00,  1.97s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 29 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_029.csv
Batch 29 successfully saved
Progress saved: Batch 29/471, 2975 files completed
Batch 29 complete. Total completed: 2975
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 30/471
Files 726-750 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:53<00:00,  2.12s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 30 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_030.csv
Batch 30 successfully saved
Progress saved: Batch 30/471, 3000 files completed
Batch 30 complete. Total completed: 3000
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 31/471
Files 751-775 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:19<00:00,  3.19s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 31 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_031.csv
Batch 31 successfully saved
Progress saved: Batch 31/471, 3025 files completed
Batch 31 complete. Total completed: 3025
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 32/471
Files 776-800 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:55<00:00,  2.22s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 32 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_032.csv
Batch 32 successfully saved
Progress saved: Batch 32/471, 3050 files completed
Batch 32 complete. Total completed: 3050
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 33/471
Files 801-825 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:07<00:00,  2.70s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.47GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.91GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 33 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_033.csv
Batch 33 successfully saved
Progress saved: Batch 33/471, 3075 files completed
Batch 33 complete. Total completed: 3075
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 34/471
Files 826-850 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:59<00:00,  2.39s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 34 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_034.csv
Batch 34 successfully saved
Progress saved: Batch 34/471, 3100 files completed
Batch 34 complete. Total completed: 3100
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 35/471
Files 851-875 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:43<00:00,  1.75s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 35 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_035.csv
Batch 35 successfully saved
Progress saved: Batch 35/471, 3125 files completed
Batch 35 complete. Total completed: 3125
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 36/471
Files 876-900 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:14<00:00,  2.97s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.47GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.91GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 36 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_036.csv
Batch 36 successfully saved
Progress saved: Batch 36/471, 3150 files completed
Batch 36 complete. Total completed: 3150
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 37/471
Files 901-925 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:57<00:00,  2.31s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 37 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_037.csv
Batch 37 successfully saved
Progress saved: Batch 37/471, 3175 files completed
Batch 37 complete. Total completed: 3175
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 38/471
Files 926-950 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:52<00:00,  2.12s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 38 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_038.csv
Batch 38 successfully saved
Progress saved: Batch 38/471, 3200 files completed
Batch 38 complete. Total completed: 3200
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 39/471
Files 951-975 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:01<00:00,  2.45s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.91GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 39 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_039.csv
Batch 39 successfully saved
Progress saved: Batch 39/471, 3225 files completed
Batch 39 complete. Total completed: 3225
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 40/471
Files 976-1000 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:58<00:00,  2.34s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 40 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_040.csv
Batch 40 successfully saved
Progress saved: Batch 40/471, 3250 files completed
Batch 40 complete. Total completed: 3250
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 41/471
Files 1001-1025 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:07<00:00,  2.68s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 41 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_041.csv
Batch 41 successfully saved
Progress saved: Batch 41/471, 3275 files completed
Batch 41 complete. Total completed: 3275
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 42/471
Files 1026-1050 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:54<00:00,  2.16s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 42 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_042.csv
Batch 42 successfully saved
Progress saved: Batch 42/471, 3300 files completed
Batch 42 complete. Total completed: 3300
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 43/471
Files 1051-1075 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:55<00:00,  2.21s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 43 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_043.csv
Batch 43 successfully saved
Progress saved: Batch 43/471, 3325 files completed
Batch 43 complete. Total completed: 3325
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 44/471
Files 1076-1100 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:44<00:00,  1.78s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 44 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_044.csv
Batch 44 successfully saved
Progress saved: Batch 44/471, 3350 files completed
Batch 44 complete. Total completed: 3350
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 45/471
Files 1101-1125 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:54<00:00,  2.20s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 45 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_045.csv
Batch 45 successfully saved
Progress saved: Batch 45/471, 3375 files completed
Batch 45 complete. Total completed: 3375
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 46/471
Files 1126-1150 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:50<00:00,  2.04s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 46 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_046.csv
Batch 46 successfully saved
Progress saved: Batch 46/471, 3400 files completed
Batch 46 complete. Total completed: 3400
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 47/471
Files 1151-1175 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:44<00:00,  1.78s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 47 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_047.csv
Batch 47 successfully saved
Progress saved: Batch 47/471, 3425 files completed
Batch 47 complete. Total completed: 3425
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 48/471
Files 1176-1200 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:44<00:00,  1.77s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 48 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_048.csv
Batch 48 successfully saved
Progress saved: Batch 48/471, 3450 files completed
Batch 48 complete. Total completed: 3450
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 49/471
Files 1201-1225 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:16<00:00,  3.07s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 49 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_049.csv
Batch 49 successfully saved
Progress saved: Batch 49/471, 3475 files completed
Batch 49 complete. Total completed: 3475
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 50/471
Files 1226-1250 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:07<00:00,  2.71s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.91GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 50 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_050.csv
Batch 50 successfully saved
Progress saved: Batch 50/471, 3500 files completed
Batch 50 complete. Total completed: 3500
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 51/471
Files 1251-1275 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:50<00:00,  2.00s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 51 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_051.csv
Batch 51 successfully saved
Progress saved: Batch 51/471, 3525 files completed
Batch 51 complete. Total completed: 3525
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 52/471
Files 1276-1300 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:42<00:00,  1.70s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 52 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_052.csv
Batch 52 successfully saved
Progress saved: Batch 52/471, 3550 files completed
Batch 52 complete. Total completed: 3550
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 53/471
Files 1301-1325 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:15<00:00,  3.03s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.47GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.91GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.38GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 53 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_053.csv
Batch 53 successfully saved
Progress saved: Batch 53/471, 3575 files completed
Batch 53 complete. Total completed: 3575
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 54/471
Files 1326-1350 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:49<00:00,  1.97s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 54 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_054.csv
Batch 54 successfully saved
Progress saved: Batch 54/471, 3600 files completed
Batch 54 complete. Total completed: 3600
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 55/471
Files 1351-1375 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:01<00:00,  2.45s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 55 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_055.csv
Batch 55 successfully saved
Progress saved: Batch 55/471, 3625 files completed
Batch 55 complete. Total completed: 3625
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 56/471
Files 1376-1400 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:08<00:00,  2.74s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 56 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_056.csv
Batch 56 successfully saved
Progress saved: Batch 56/471, 3650 files completed
Batch 56 complete. Total completed: 3650
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 57/471
Files 1401-1425 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:57<00:00,  2.31s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 57 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_057.csv
Batch 57 successfully saved
Progress saved: Batch 57/471, 3675 files completed
Batch 57 complete. Total completed: 3675
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 58/471
Files 1426-1450 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:00<00:00,  2.41s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 58 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_058.csv
Batch 58 successfully saved
Progress saved: Batch 58/471, 3700 files completed
Batch 58 complete. Total completed: 3700
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 59/471
Files 1451-1475 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:54<00:00,  2.17s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 59 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_059.csv
Batch 59 successfully saved
Progress saved: Batch 59/471, 3725 files completed
Batch 59 complete. Total completed: 3725
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 60/471
Files 1476-1500 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:54<00:00,  2.18s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 60 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_060.csv
Batch 60 successfully saved
Progress saved: Batch 60/471, 3750 files completed
Batch 60 complete. Total completed: 3750
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 61/471
Files 1501-1525 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:51<00:00,  2.06s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 61 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_061.csv
Batch 61 successfully saved
Progress saved: Batch 61/471, 3775 files completed
Batch 61 complete. Total completed: 3775
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 62/471
Files 1526-1550 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:54<00:00,  2.16s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 62 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_062.csv
Batch 62 successfully saved
Progress saved: Batch 62/471, 3800 files completed
Batch 62 complete. Total completed: 3800
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 63/471
Files 1551-1575 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:52<00:00,  2.12s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 63 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_063.csv
Batch 63 successfully saved
Progress saved: Batch 63/471, 3825 files completed
Batch 63 complete. Total completed: 3825
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 64/471
Files 1576-1600 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:53<00:00,  2.12s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 64 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_064.csv
Batch 64 successfully saved
Progress saved: Batch 64/471, 3850 files completed
Batch 64 complete. Total completed: 3850
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 65/471
Files 1601-1625 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:07<00:00,  2.68s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 65 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_065.csv
Batch 65 successfully saved
Progress saved: Batch 65/471, 3875 files completed
Batch 65 complete. Total completed: 3875
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 66/471
Files 1626-1650 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:06<00:00,  2.67s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 66 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_066.csv
Batch 66 successfully saved
Progress saved: Batch 66/471, 3900 files completed
Batch 66 complete. Total completed: 3900
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 67/471
Files 1651-1675 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:03<00:00,  2.53s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 67 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_067.csv
Batch 67 successfully saved
Progress saved: Batch 67/471, 3925 files completed
Batch 67 complete. Total completed: 3925
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 68/471
Files 1676-1700 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:58<00:00,  2.33s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.38GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 68 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_068.csv
Batch 68 successfully saved
Progress saved: Batch 68/471, 3950 files completed
Batch 68 complete. Total completed: 3950
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 69/471
Files 1701-1725 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:39<00:00,  1.60s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 69 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_069.csv
Batch 69 successfully saved
Progress saved: Batch 69/471, 3975 files completed
Batch 69 complete. Total completed: 3975
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 70/471
Files 1726-1750 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:49<00:00,  1.97s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.38GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 70 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_070.csv
Batch 70 successfully saved
Progress saved: Batch 70/471, 4000 files completed
Batch 70 complete. Total completed: 4000
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 71/471
Files 1751-1775 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:53<00:00,  2.15s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 71 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_071.csv
Batch 71 successfully saved
Progress saved: Batch 71/471, 4025 files completed
Batch 71 complete. Total completed: 4025
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 72/471
Files 1776-1800 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:56<00:00,  2.27s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 72 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_072.csv
Batch 72 successfully saved
Progress saved: Batch 72/471, 4050 files completed
Batch 72 complete. Total completed: 4050
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 73/471
Files 1801-1825 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:45<00:00,  1.83s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 73 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_073.csv
Batch 73 successfully saved
Progress saved: Batch 73/471, 4075 files completed
Batch 73 complete. Total completed: 4075
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 74/471
Files 1826-1850 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:57<00:00,  2.29s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 74 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_074.csv
Batch 74 successfully saved
Progress saved: Batch 74/471, 4100 files completed
Batch 74 complete. Total completed: 4100
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 75/471
Files 1851-1875 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:00<00:00,  2.42s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 75 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_075.csv
Batch 75 successfully saved
Progress saved: Batch 75/471, 4125 files completed
Batch 75 complete. Total completed: 4125
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 76/471
Files 1876-1900 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [00:58<00:00,  2.33s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 76 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_076.csv
Batch 76 successfully saved
Progress saved: Batch 76/471, 4150 files completed
Batch 76 complete. Total completed: 4150
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 77/471
Files 1901-1925 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing: 100%|██████████| 25/25 [01:06<00:00,  2.68s/it]


Basic features extracted for 25 files
Processing batch of 25 images
Memory before processing: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: BERT Embeddings
Loading hebrew_bert...


Some weights of BertModel were not initialized from the model checkpoint at onlplab/alephbert-base and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Hebrew BERT loaded
Memory after BERT: GPU: 0.01GB allocated, 0.02GB reserved
Cleaning up hebrew_bert...
hebrew_bert cleaned up
Phase 2: Sentiment Analysis
Loading hebrew_sentiment...


Device set to use cuda:0


Hebrew sentiment loaded
Memory after Sentiment: GPU: 0.42GB allocated, 0.46GB reserved
Cleaning up hebrew_sentiment...
hebrew_sentiment cleaned up
Phase 3: Named Entity Recognition
Loading hebrew_ner...


Device set to use cuda:0
Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Hebrew NER loaded
Memory after NER: GPU: 0.82GB allocated, 0.91GB reserved
Cleaning up hebrew_ner...
hebrew_ner cleaned up
Phase 4: Object Detection
Loading object_detection...


Some weights of the model checkpoint at facebook/detr-resnet-50 were not used when initializing DetrForObjectDetection: ['model.backbone.conv_encoder.model.layer1.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer2.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer3.0.downsample.1.num_batches_tracked', 'model.backbone.conv_encoder.model.layer4.0.downsample.1.num_batches_tracked']
- This IS expected if you are initializing DetrForObjectDetection from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing DetrForObjectDetection from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Object detection loaded
Memory after Object Detection: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up object_detection...
object_detection cleaned up
Phase 5: Image Captioning
Loading image_captioning...
Image captioning loaded
Memory after Image Captioning: GPU: 0.82GB allocated, 0.90GB reserved
Cleaning up image_captioning...


No model was supplied, defaulted to dandelin/vilt-b32-finetuned-vqa and revision d0a1f6a (https://huggingface.co/dandelin/vilt-b32-finetuned-vqa).
Using a pipeline without specifying a model name and revision in production is not recommended.


image_captioning cleaned up
Phase 6: Visual Question Answering
Loading vqa...


Device set to use cuda:0


VQA loaded
Memory after VQA: GPU: 1.26GB allocated, 1.39GB reserved
Cleaning up vqa...
vqa cleaned up
Batch processing complete. Final memory: GPU: 1.26GB allocated, 1.38GB reserved
Batch 77 saved to: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/batch_results/batch_077.csv
Batch 77 successfully saved
Progress saved: Batch 77/471, 4175 files completed
Batch 77 complete. Total completed: 4175
Memory after cleanup: GPU: 0.01GB allocated, 0.02GB reserved

PROCESSING BATCH 78/471
Files 1926-1950 of 11773
Memory at start: GPU: 0.01GB allocated, 0.02GB reserved
Phase 1: Basic feature extraction


Basic processing:   8%|▊         | 2/25 [00:02<00:27,  1.18s/it]

In [9]:
if not df_final.empty:
    print(f"\nSaving final dataset")
    try:
        df_final.to_csv(OUTPUT_CSV, index=False)

        # Verify save
        file_size = os.path.getsize(OUTPUT_CSV) / 1024**2
        print(f"SUCCESS! Dataset saved:")
        print(f"  File: {OUTPUT_CSV}")
        print(f"  Size: {file_size:.2f} MB")
        print(f"  Records: {len(df_final)}")
        print(f"  Columns: {len(df_final.columns)}")

        # Quick data check
        print(f"\nQuick data check:")
        print(f"  Ad types: {df_final['ad_type'].value_counts().to_dict()}")
        print(f"  Files with Hebrew text: {(df_final['text_heb_length'] > 0).sum()}")
        print(f"  Files with prices: {(df_final['price_count'] > 0).sum()}")

        print(f"\nYour dataset is ready for analysis!")

    except Exception as e:
        print(f"Error saving final dataset: {e}")
else:
    print("No data to save!")


Saving final dataset
SUCCESS! Dataset saved:
  File: /content/drive/MyDrive/Miki_class/Project/Catalog/processed_ads/ads_features.csv
  Size: 42.39 MB
  Records: 2725
  Columns: 48

Quick data check:
  Ad types: {'multi': 1925, 'single': 800}
  Files with Hebrew text: 2442
  Files with prices: 0

Your dataset is ready for analysis!
