In [1]:
import time
import os
from google import genai
import pandas as pd
from google.genai import types
file_path = 'Dataset/2_label/V1_5 divisions_per_second.xlsx'
sheet_name = 'Metadata'

df = pd.read_excel(file_path, sheet_name=sheet_name)
df.set_index('Column (Header)', inplace=True)

gauge_type = df.loc['type', 'Content / Example']
unit = df.loc['unit', 'Content / Example']
reading_start = df.loc['manufacturer_range_min', 'Content / Example']
reading_end = df.loc['manufacturer_range_max', 'Content / Example']

reading_start = float(reading_start)
reading_end = float(reading_end)
client = genai.Client(api_key=os.environ["GOOGLE_API_KEY"])
graduation_interval = df.loc['graduation_interval', 'Content / Example']
# model_name = "gemini-3-flash-preview"
model_name = "gemini-3-pro-preview"
VIDEO_DIR = 'Dataset/2.- Analog Depth Gauge/'
video_files = [f for f in os.listdir(VIDEO_DIR) if f.endswith('.mp4')]
output_dir = f'Results_new/2.- Analog Depth Gauge{model_name}'
os.makedirs(output_dir, exist_ok=True)
    
for video_filename in video_files:
    seq_id = os.path.splitext(video_filename)[0]
    
    output_file = os.path.join(output_dir, f'{seq_id}_Raw_Results.xlsx')
    if os.path.exists(output_file):
        print(f"Sikpping: {seq_id} (the result file already exists)")
        continue
    
    print(f"\n{'='*20} Processing: {seq_id} {'='*20}")

    print("Uploading...")
    program_start_time = time.time()
    myfile = client.files.upload(file=f"{VIDEO_DIR}{seq_id}.mp4")
    print(f"Uploaded: {myfile.name}")


    print("Processing...")
    while True:

        file_status = client.files.get(name=myfile.name)

        if file_status.state == "ACTIVE":
            print("Succeed")
            break
        elif file_status.state == "FAILED":
            raise Exception("Failed (State: FAILED)")

        print("Processing, waiting...")
        time.sleep(5)

    
# CoT prompt
    message = f'''
    ROLE: Expert Industrial Metrology Assistant

    PROTOCOL: See-Think-Confirm
    1. SEE: Localize gauge boundaries. Identify [0%] and [100%] markers.
    2. THINK: Synchronize needle position with the in-band digital clock. Calculate velocity (ΔReading/ΔTime). Use 'Thought Signatures' to maintain temporal state between frames.
    3. CONFIRM: Verify that the reading is within [min, max] and follows physical monotonicity relative to previous frames.
    CONSTRAINT: Output JSON only. Format: {{"ts_ms": integer, "reading": float, "confidence": float}}
    TASK: Extract a high-precision time-series of readings from the video.

    INSTRUMENT METADATA:
    - Gauge Type:{gauge_type} | Unit: {unit} 
    - Graduation Interval: {graduation_interval}
    - Calibrated Range: [{reading_start}, {reading_end}]

    SAMPLING PROTOCOL:
    - Absolute Time Reference: Every 'ts_ms' in your output must correspond to the EXACT numerical value shown on the digital chronometer in the frame.
    - Starting Point: The chronometer in this video starts at a non-zero value. Ignore any frames before the timer reaches the first 200ms integer increment (e.g., if the timer starts at 5670ms, your first entry should be at 5800ms).
    - Sampling Interval: Provide one reading every 200ms based on the chronometer's increments (e.g., 5800, 6000, 6200...). 
    - Duration: Continue this sequence until the video ends, strictly following the chronometer's values, regardless of the video file's elapsed time.

    RESPONSE REQUIREMENT:
    Output a single JSON array where each entry contains: {{"ts_ms": integer, "reading": float, "confidence": float}}
    '''


# naive prompt
#     message = f'''
#     ROLE: Expert Industrial Metrology Assistant

#     TASK: Read the analog gauge values from the video at specific time intervals.

#     INSTRUMENT METADATA:
#     - Gauge Type:{gauge_type} | Unit: {unit} 
#     - Graduation Interval: {graduation_interval}
#     - Calibrated Range: [{reading_start}, {reading_end}]

#     SAMPLING REQUIREMENTS:
#     - Use the digital chronometer shown in the video for timing.
#     - Start reading from the first 200ms integer increment.
#     - Provide one reading every 200ms (e.g., 5800, 6000, 6200...).
#     - Continue until the video ends.

#     OUTPUT FORMAT:
#     Return only a JSON array of objects: {{"ts_ms": integer, "reading": float, "confidence": float}}
#     '''
    print("Generating...")


    response = client.models.generate_content(
        model=model_name, 
        contents=[myfile, message],
        config=types.GenerateContentConfig(
            temperature=0.0,
            thinking_config=types.ThinkingConfig(thinking_level="high")
        )
    )
    # "Identify the analog gauge and output a timestamped table of its readings based on the visible dial markings."
    print(response.text)


    program_end_time = time.time()
    total_duration = program_end_time - program_start_time
    print(total_duration)

    df_output = pd.DataFrame([{
        "sequence_id": seq_id,
        "raw_model_response": response.text,
        "total_duration_sec": round(total_duration, 2),
        "model_name": model_name,
    }])

    df_output.to_excel(output_file, index=False)


Uploading...
Uploaded: files/94di9qqzma0c
Processing...
Processing, waiting...
Processing, waiting...
Succeed
Generating...




```json
[
  {"ts_ms": 4400, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 4600, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 4800, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 5000, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 5200, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 5400, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 5600, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 5800, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 6000, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 6200, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 6400, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 6600, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 6800, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 7000, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 7200, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 7400, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 7600, "reading": 12.25, "confidence": 0.95},
  {"ts_ms": 7800, "re



```json
[
  {"ts_ms": 3000, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 3200, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 3400, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 3600, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 3800, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 4000, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 4200, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 4400, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 4600, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 4800, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 5000, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 5200, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 5400, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 5600, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 5800, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 6000, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 6200, "reading": 15.8, "confidence": 0.95},
  {"ts_ms": 6400, "reading": 15.8, "co



[
  {"ts_ms": 3200, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 3400, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 3600, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 3800, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 4000, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 4200, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 4400, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 4600, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 4800, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 5000, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 5200, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 5400, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 5600, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 5800, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 6000, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 6200, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 6400, "reading": 14.45, "confidence": 0.95},
  {"ts_ms": 6600, "reading": 



[
  {"ts_ms": 2800, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 3000, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 3200, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 3400, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 3600, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 3800, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 4000, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 4200, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 4400, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 4600, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 4800, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 5000, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 5200, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 5400, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 5600, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 5800, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 6000, "reading": 16.5, "confidence": 0.95},
  {"ts_ms": 6200, "reading": 16.5, "confidence



[
  {"ts_ms": 2800, "reading": 13.81, "confidence": 0.92},
  {"ts_ms": 3000, "reading": 13.90, "confidence": 0.92},
  {"ts_ms": 3200, "reading": 13.99, "confidence": 0.92},
  {"ts_ms": 3400, "reading": 14.08, "confidence": 0.92},
  {"ts_ms": 3600, "reading": 14.17, "confidence": 0.92},
  {"ts_ms": 3800, "reading": 14.25, "confidence": 0.92},
  {"ts_ms": 4000, "reading": 14.34, "confidence": 0.92},
  {"ts_ms": 4200, "reading": 14.43, "confidence": 0.92},
  {"ts_ms": 4400, "reading": 14.52, "confidence": 0.92},
  {"ts_ms": 4600, "reading": 14.61, "confidence": 0.92},
  {"ts_ms": 4800, "reading": 14.69, "confidence": 0.92},
  {"ts_ms": 5000, "reading": 14.78, "confidence": 0.92},
  {"ts_ms": 5200, "reading": 14.87, "confidence": 0.92},
  {"ts_ms": 5400, "reading": 14.96, "confidence": 0.92},
  {"ts_ms": 5600, "reading": 15.05, "confidence": 0.92},
  {"ts_ms": 5800, "reading": 15.13, "confidence": 0.92},
  {"ts_ms": 6000, "reading": 15.22, "confidence": 0.92},
  {"ts_ms": 6200, "reading": 



```json
[
  {"ts_ms": 3000, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 3200, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 3400, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 3600, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 3800, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 4000, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 4200, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 4400, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 4600, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 4800, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 5000, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 5200, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 5400, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 5600, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 5800, "reading": 15.75, "confidence": 0.95},
  {"ts_ms": 6000, "reading": 15.75, "confidence": 0.90},
  {"ts_ms": 6200, "reading": 15.25, "confidence": 0.85},
  {"ts_ms": 6400, "re

In [None]:
#     message = f'''
# ROLE: Expert Industrial Metrology Assistant.

# PROTOCOL: See-Think-Confirm (apply internally; output JSON only as specified below).

# 1. SEE
#    - Locate the mechanical (vernier) caliper and its scale boundaries. Treat the scale minimum as [0%] and scale maximum as [100%] (these correspond to Calibrated Range below).
#    - Identify the digital chronometer in the frame and read its displayed time in milliseconds.
#    - Read the caliper: (a) main scale reading at the leading edge of the vernier (or jaw); (b) find the vernier division that best aligns with a main-scale line; combine into one reading using Graduation Interval for the vernier resolution.

# 2. THINK
#    - Map each required timestamp (ts_ms) to the correct moment in the video using the in-band digital chronometer as the sole time reference.
#    - If no frame shows the chronometer exactly at ts_ms: pick the nearest frame(s) and linearly interpolate the reading from adjacent chronometer values and caliper readings (ΔReading/ΔTime). Maintain temporal continuity: each new reading should be consistent with the previous reading and the direction of motion (slider/jaw moving in or out; reading increasing or decreasing).
#    - Optionally use thought signatures or internal state to carry last_ts_ms, last_reading, and trend across frames.

# 3. CONFIRM
#    - Clamp every reading to the Calibrated Range [min, max].
#    - Enforce physical monotonicity: readings should not reverse direction unless the slider/jaw visibly moves backward in the video. Small jitter from parallax or blur may be smoothed; if unsure, reflect it in confidence.
#    - Set confidence (0.0–1.0) by: visibility of main scale and vernier, clarity of the aligning vernier division, alignment of chronometer with ts_ms, and consistency with neighboring readings. Lower confidence if interpolating heavily or if the caliper is partially occluded.

# CONSTRAINT: Output a single JSON array only. No markdown, no explanation outside the array. Each element: {{"ts_ms": integer, "reading": float, "confidence": float}}.

# TASK: Produce a high-precision time-series of caliper readings from the video at the specified sampling instants.

# INSTRUMENT METADATA:
# - Gauge Type: {gauge_type} | Unit: {unit}
# - Graduation Interval: {graduation_interval}
# - Calibrated Range: [{reading_start}, {reading_end}]

# SAMPLING PROTOCOL:
# - Time reference: Every output ts_ms must align with the digital chronometer value (in ms) visible in the video. Use the chronometer as the ground truth for time; ignore video file timestamps.
# - Start: Begin from the first 200 ms integer multiple reached by the chronometer (e.g. if it starts at 5670 ms, first output entry at 5800 ms). Omit any earlier frames.
# - Interval: One reading every 200 ms by chronometer (e.g. 5800, 6000, 6200, …).
# - End: Continue until the video ends, always using chronometer values for ts_ms.

# RESPONSE REQUIREMENT:
# Output only one JSON array. Each entry: {{"ts_ms": integer, "reading": float, "confidence": float}}. No other text before or after.
# '''