### MoonDream Vision Language Model

In [None]:
!git clone https://github.com/vikhyat/moondream

### Installing dependencies for moondream


In [None]:
!pip install -r ./moondream/requirements.txt

### Main working script

In [None]:
import pandas as pd
import subprocess
import os
import requests
import re

csv_input_path = r"/content/test.csv"  # Path to the test.csv file
image_folder = r"/content/image"  # Path to the folder containing the images
csv_output_path = r"/content/output.csv"  # Output path
exec_path = r"/content/moondream/sample.py"  # Path of the sample.py from the moondream repo
pyrun_name = "python"  # Python executable name

data = pd.read_csv(csv_input_path)
output_data = []

def download_and_save_image(image_url, image_name):
    try:
        response = requests.get(image_url)
        response.raise_for_status()  # Raise an exception for bad status codes

        image_name = image_folder + "/" + str(image_name) + ".jpg"
        with open(str(image_name), 'wb') as file:
            file.write(response.content)

        print(f"Image downloaded successfully as {image_name}")
        return True
    except requests.exceptions.RequestException as e:
        print(f"Error downloading image: {e}")
        return False

def clean_output(unclean_value, value_type):
    """Cleans the unit from a messy input string and returns a standardized output."""
    unclean_value = str(unclean_value).lower()

    unit_mapping = {
        "width": ["centimetre", "foot", "millimetre", "metre", "inch", "yard"],
        "depth": ["centimetre", "foot", "millimetre", "metre", "inch", "yard"],
        "height": ["centimetre", "foot", "millimetre", "metre", "inch", "yard"],
        "item_weight": ["milligram", "kilogram", "microgram", "gram", "ounce", "ton", "pound"],
        "maximum_weight_recommendation": ["milligram", "kilogram", "microgram", "gram", "ounce", "ton", "pound"],
        "voltage": ["millivolt", "kilovolt", "volt"],
        "wattage": ["kilowatt", "watt"],
        "item_volume": ["cubic foot", "microlitre", "cup", "fluid ounce",
                        "centilitre", "imperial gallon", "pint", "decilitre",
                        "litre", "millilitre", "quart", "cubic inch", "gallon"]
    }

    unit_alternatives = {
        "centimetre": ["cm", "cms", "centimeters", "centimeter"],
        "foot": ["ft", "feet"],
        "millimetre": ["mm", "mms", "millimeters", "millimeter"],
        "metre": ["m", "ms", "meters", "meter"],
        "inch": ["in", "inches", '"'],
        "yard": ["yd", "yds", "yards"],
        "milligram": ["mg", "mgs", "milligrams"],
        "kilogram": ["kg", "kgs", "kilograms"],
        "microgram": ["mcg", "mcgs", "micrograms"],
        "gram": ["g", "gs", "grams"],
        "ounce": ["oz", "ozs", "ounces"],
        "ton": ["tons", "tonnes"],
        "pound": ["lb", "lbs", "pounds"],
        "millivolt": ["mv", "mvs", "millivolts"],
        "kilovolt": ["kv", "kvs", "kilovolts"],
        "volt": ["v", "vs", "volts"],
        "kilowatt": ["kw", "kws", "kilowatts"],
        "watt": ["w", "ws", "watts"],
        "cubic foot": ["ft3", "cu ft", "cubic feet"],
        "microlitre": ["µl", "ul", "microlitres"],
        "cup": ["cups"],
        "fluid ounce": ["fl oz", "fl. oz.", "fluid ounces"],
        "centilitre": ["cl", "cls", "centilitres"],
        "imperial gallon": ["imp gal", "imperial gallons"],
        "pint": ["pt", "pts", "pints"],
        "decilitre": ["dl", "dls", "decilitres"],
        "litre": ["l", "ls", "liters", "liter"],
        "millilitre": ["ml", "mls", "millilitres"],
        "quart": ["qt", "qts", "quarts"],
        "cubic inch": ["in3", "cu in", "cubic inches"],
        "gallon": ["gal", "gals", "gallons"]
    }

    if re.match(r"^\d+\.?\d*$", unclean_value):
        return f"{unclean_value} {unit_mapping[value_type][0]}"

    for unit in unit_mapping[value_type]:
        for alt_unit in [unit] + unit_alternatives.get(unit, []):
            pattern1 = r"(\d+\.?\d*)\s*({}s?)\b".format(alt_unit)
            pattern2 = r"(\d+\.?\d*)({}s?)\b".format(alt_unit[:3])
            pattern3 = r"(\d+\.?\d*)({}s?)".format(alt_unit)

            match = re.search(pattern1, unclean_value, re.IGNORECASE) or \
                    re.search(pattern2, unclean_value, re.IGNORECASE) or \
                    re.search(pattern3, unclean_value, re.IGNORECASE)

            if match:
                value = match.group(1)
                cleaned_unit = unit
                return f"{value} {cleaned_unit}"

    print(f"Could not clean value: {unclean_value}")
    return ""

# Start processing from index 0
for index, row in data.iterrows():
    if index < 0:
        continue

    actual_index = row['index']
    image_index = row['group_id']
    entity_name = row['entity_name']
    image_path = os.path.join(image_folder, f"{image_index}.jpg")
    print(f"Processing index: {actual_index}")

    image_url = row['image_link']
    image_name = image_index
    download_and_save_image(image_url, image_name)

    if not os.path.exists(image_path):
        print(f"Image {image_index} not found, skipping.")
        output_data.append([image_index, ""])
        continue

    prompt = f"Extract {entity_name} from image. Do not give any other text/ information. If there are multiple values, give only the most important value along with the unit, separated by space. Answer: "
    prompt += '{'

    try:
        result = subprocess.run(
            [pyrun_name, exec_path, '--image', image_path, '--prompt', prompt],
            capture_output=True, text=True, timeout=120
        )

        model_output = result.stdout.strip()
        model_output = model_output.split('{')[-1].strip()
        model_output = clean_output(model_output, entity_name)
        print(f"Model Output: {model_output}")

        if not model_output:
            model_output = ""

        output_data.append([actual_index, model_output])

        # Save output data incrementally
        output_df = pd.DataFrame(output_data, columns=['index', 'prediction'])
        output_df.to_csv(csv_output_path, index=False)
        print(f"Incremental save to {csv_output_path}")

    except Exception as e:
        print(f"Error processing image {image_index}: {e}")
        output_data.append([actual_index, ""])

print("Process complete. Final output saved to", csv_output_path)
