In [31]:
#!pip install together opencv-python

In [2]:
# defining paths for images and ouputs

#input_images
image_url_terminal = "./Screenshots/Terminal.png"
image_url_excel = "./Screenshots/Excel.png"
image_url_chrome = "./Screenshots/Chrome.png"
image_path_pdf = "./Screenshots/PDF.png"
image_path_powershell_error = "./Screenshots/Powershell_error.png"
image_path_powershell_success = "./Screenshots/Powershell_error_free.png"

#input_videos
video_path_coding_vba = "./VidRecordings/CodingVBA_sample.mp4"

#output_images
output_pdf = "./Outputs/analysis_output_pdf.md"
output_powershell_error = "./Outputs/analysis_output_powershell_error_nonAdmin.md"
output_powershell_success = "./Outputs/analysis_output_powershell_Admin.md"
output_terminal = "./Outputs/analysis_output_terminal.md"
output_excel = "./Outputs/analysis_output_excel.md"
output_chrome = "./Outputs/analysis_output_chrome.md"

#output_videos
output_video_coding_vba = "./Outputs/analysis_output_coding_vba.md"

In [3]:
import base64
import os
from nbconvert import export
import requests
from together import Together
from PIL import Image
from io import BytesIO
import json

os.environ["TOGETHER_API_KEY"] = "tgp_v1_DgIeKD-5c-MS2fxBpr5VO8hIB1TeUgun-Nk7Y5b9o98"

# Initialize Together AI client (set API key as environment variable or replace here)
# os.environ["TOGETHER_API_KEY"] = "your_api_key_here"
client = Together()

def load_image(image_source, is_url=False):
    """
    Load an image from a URL or local file and return it as a base64-encoded string with content type.
    
    Args:
        image_source (str): URL or local file path to the image.
        is_url (bool): True if image_source is a URL, False if local file.
    
    Returns:
        tuple: (base64-encoded image string, content type)
    """
    try:
        if is_url:
            response = requests.get(image_source, timeout=10)
            response.raise_for_status()
            img = Image.open(BytesIO(response.content))
            content_type = response.headers.get('Content-Type', 'image/jpeg')
            format = content_type.split('/')[-1].lower()
        else:
            format = os.path.splitext(image_source)[1][1:].lower()
            if format not in ['jpeg', 'jpg', 'png']:
                format = 'jpeg'
            img = Image.open(image_source)
            content_type = f"image/{format}"
        
        buffer = BytesIO()
        img.save(buffer, format=format)
        img_str = base64.b64encode(buffer.getvalue()).decode("utf-8")
        return img_str, content_type
    
    except Exception as e:
        raise ValueError(f"Failed to load image: {e}")

def generate_image_summary(image_source, is_url=False, max_tokens=5000):
    """
    Generate a summary of an image using Llama 4 Maverick via Together AI API.
    
    Args:
        image_source (str): URL or local file path to the image.
        is_url (bool): True if image_source is a URL, False if local file.
        max_tokens (int): Maximum tokens for the summary.
    
    Returns:
        str: Summary of the image.
    """

    prompt = """You are an expert computer vision analyst specializing in desktop environment analysis. Examine the provided screenshot with meticulous attention to detail and deliver your analysis in the following JSON format:

        {
            "applications_open": ["comprehensive list of ALL visible applications, software, and browser tabs in the screenshot"],
            "text_content": ["ALL readable text visible in the screenshot, including application names, window titles, menu items, document content, code snippets, terminal commands, browser content, notifications, taskbar information, and any other visible text elements"],
            "summary": "A comprehensive yet concise analysis that integrates all observations from applications_open and text_content into a cohesive narrative. Describe what the user appears to be working on, the relationship between open applications, and provide context for the visible content. This summary must be detailed enough to stand alone as a complete analysis of the screenshot."
        }

        Analysis guidelines:
        1. Be exhaustive in identifying ALL open applications - include minimized apps in taskbars, system trays, docks, browser tabs, and background processes with visual indicators
        2. Capture ALL visible text regardless of size or prominence - include menu items, file paths, code, commands, URLs, and partial text if readable
        3. When analyzing code or technical content, note the programming language, frameworks, or technologies in use
        4. Pay attention to timestamps, usernames, file names, and other contextual information
        5. Consider the relationship between open applications to infer the user's workflow
        6. In the summary, reconstruct the likely sequence of the user's activities based on the visible evidence

        Image data: data:image/png;base64,{base64_image}

        Provide only the JSON response without any introduction or additional text."""

    img_base64, content_type = load_image(image_source, is_url)
    messages = [
        {
            "role": "user",
            "content": [
                {"type": "text", "text": prompt},
                {"type": "image_url", "image_url": {"url": f"data:{content_type};base64,{img_base64}"}}
            ]
        }
    ]
    
    response = client.chat.completions.create(
        model="meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
        messages=messages,
        temperature=0.3,
        max_tokens=max_tokens,
    )
    
    return response.choices[0].message.content.strip()




In [4]:
import cv2
import numpy as np
from typing import List, Tuple
import tempfile
import shutil
import os
from PIL import Image

def extract_significant_frames(
    video_path: str, 
    difference_threshold: float = 0.1,
    min_frame_interval: int = 5,
    max_frames: int = 100,
    resize_factor: float = 0.5
) -> Tuple[List[Tuple[int, float, float]], dict]:
    """
    Extract significant frame metadata from a video using frame differencing.
    Uses resized frames for fast processing but returns metadata for high-quality extraction.
    
    Args:
        video_path (str): Path to the input video file
        difference_threshold (float): Threshold for considering frames significantly different (0-1)
        min_frame_interval (int): Minimum number of frames between significant frames
        max_frames (int): Maximum number of significant frames to extract
        resize_factor (float): Factor to resize frames for faster processing (doesn't affect output quality)
        
    Returns:
        Tuple[List[Tuple[int, float, float]], dict]: 
            - List of (frame_number, timestamp, difference_score)
            - Video metadata dict with fps, total_frames, etc.
    """
    
    cap = cv2.VideoCapture(video_path)
    
    if not cap.isOpened():
        raise ValueError(f"Error opening video file: {video_path}")
    
    # Get video properties
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    fps = cap.get(cv2.CAP_PROP_FPS)
    duration = total_frames / fps if fps > 0 else 0
    
    video_metadata = {
        'fps': fps,
        'total_frames': total_frames,
        'duration': duration,
        'video_path': video_path
    }
    
    print(f"Video info: {total_frames} frames, {fps:.2f} FPS, {duration:.1f}s duration")
    
    significant_frame_metadata = []
    prev_gray = None
    last_significant_frame = -min_frame_interval
    frame_count = 0
    
    # Read first frame
    ret, prev_frame = cap.read()
    if not ret:
        raise ValueError("Could not read the first frame")
    
    # Resize for faster processing (this doesn't affect final output quality)
    if resize_factor != 1.0:
        prev_frame_resized = cv2.resize(prev_frame, None, fx=resize_factor, fy=resize_factor)
    else:
        prev_frame_resized = prev_frame
    
    prev_gray = cv2.cvtColor(prev_frame_resized, cv2.COLOR_BGR2GRAY)
    
    # Add first frame as significant
    timestamp = 0.0
    significant_frame_metadata.append((0, timestamp, 1.0))
    
    print("Processing frames for significant changes...")
    
    while True:
        ret, current_frame = cap.read()
        if not ret:
            break
            
        frame_count += 1
        timestamp = frame_count / fps if fps > 0 else 0
        
        # Resize for faster processing only
        if resize_factor != 1.0:
            current_frame_resized = cv2.resize(current_frame, None, fx=resize_factor, fy=resize_factor)
        else:
            current_frame_resized = current_frame
        
        # Convert to grayscale
        current_gray = cv2.cvtColor(current_frame_resized, cv2.COLOR_BGR2GRAY)
        
        # Calculate frame difference
        frame_diff = cv2.absdiff(prev_gray, current_gray)
        
        # Apply Gaussian blur to reduce noise
        frame_diff_blur = cv2.GaussianBlur(frame_diff, (5, 5), 0)
        
        # Calculate percentage of changed pixels
        _, thresh = cv2.threshold(frame_diff_blur, 25, 255, cv2.THRESH_BINARY)
        changed_pixels = cv2.countNonZero(thresh)
        total_pixels = thresh.shape[0] * thresh.shape[1]
        change_percentage = changed_pixels / total_pixels
        
        # Check if frame is significantly different and meets interval requirement
        if (change_percentage > difference_threshold and 
            frame_count - last_significant_frame >= min_frame_interval):
            
            significant_frame_metadata.append((frame_count, timestamp, change_percentage))
            last_significant_frame = frame_count
            
            print(f"Significant frame found: Frame {frame_count}, Time: {timestamp:.2f}s, Change: {change_percentage:.3f}")
            
            # Stop if we've reached max frames
            if len(significant_frame_metadata) >= max_frames:
                print(f"Reached maximum frames limit ({max_frames})")
                break
        
        # Update previous frame
        prev_gray = current_gray.copy()
        
        # Print progress
        if frame_count % 1000 == 0:
            progress = (frame_count / total_frames) * 100
            print(f"Progress: {progress:.1f}% ({frame_count}/{total_frames})")
    
    cap.release()
    
    print(f"Extracted {len(significant_frame_metadata)} significant frame timestamps from {total_frames} total frames")
    print(f"Frame reduction: {((total_frames - len(significant_frame_metadata)) / total_frames) * 100:.1f}%")
    
    return significant_frame_metadata, video_metadata

def extract_high_quality_frames_from_video(
    video_path: str,
    frame_metadata: List[Tuple[int, float, float]],
    output_dir: str = "./temp_frames"
) -> List[str]:
    """
    Extract high-quality frames from video using frame numbers/timestamps.
    This ensures maximum quality for AI analysis.
    
    Args:
        video_path (str): Path to the input video file
        frame_metadata: List of (frame_number, timestamp, difference_score)
        output_dir: Directory to save frame images
        
    Returns:
        List[str]: List of saved high-quality image file paths
    """
    
    # Create output directory if it doesn't exist
    os.makedirs(output_dir, exist_ok=True)
    
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Error opening video file: {video_path}")
    
    image_paths = []
    total_frames_to_extract = len(frame_metadata)
    
    print(f"Extracting {total_frames_to_extract} high-quality frames from original video...")
    
    for i, (frame_num, timestamp, diff_score) in enumerate(frame_metadata):
        # Set video position to specific frame
        cap.set(cv2.CAP_PROP_POS_FRAMES, frame_num)
        
        ret, frame = cap.read()
        if not ret:
            print(f"Warning: Could not read frame {frame_num}")
            continue
        
        # Create filename
        filename = f"frame_{frame_num:06d}_diff_{diff_score:.3f}.png"
        filepath = os.path.join(output_dir, filename)
        
        # Save high-quality image directly using OpenCV (preserves original quality)
        # Use PNG for lossless compression
        success = cv2.imwrite(filepath, frame, [cv2.IMWRITE_PNG_COMPRESSION, 1])
        
        if success:
            image_paths.append(filepath)
            if (i + 1) % 10 == 0:
                print(f"  Extracted {i + 1}/{total_frames_to_extract} high-quality frames")
        else:
            print(f"Warning: Failed to save frame {frame_num}")
    
    cap.release()
    
    print(f"Successfully saved {len(image_paths)} high-quality frame images to {output_dir}")
    return image_paths

def save_frames_as_images(frames_data: List[Tuple[int, float, float]], 
                         video_path: str,
                         output_dir: str = "./temp_frames") -> List[str]:
    """
    Legacy wrapper function for backward compatibility.
    Now uses high-quality frame extraction method.
    
    Args:
        frames_data: List of (frame_number, timestamp, difference_score)
        video_path: Path to the original video file
        output_dir: Directory to save frame images
        
    Returns:
        List[str]: List of saved image file paths
    """
    
    return extract_high_quality_frames_from_video(video_path, frames_data, output_dir)


In [5]:
import json
import re

def convert_output_to_json(output_str):
    """
    Convert the output string to a JSON object with robust error handling.
    
    Args:
        output_str (str): The string containing the JSON-like data structure
        
    Returns:
        dict: A properly formatted JSON object
    """
    # First try direct JSON parsing if the output is a valid JSON
    if output_str.strip().startswith("{") and output_str.strip().endswith("}"):
        try:
            # Handle potential code block markers
            clean_str = output_str.strip()
            if clean_str.startswith("```") and clean_str.endswith("```"):
                clean_str = clean_str[3:-3].strip()
                if clean_str.startswith("json"):
                    clean_str = clean_str[4:].strip()
            
            return json.loads(clean_str)
        except json.JSONDecodeError:
            pass  # Continue with regex-based parsing
    
    # Create an empty result structure
    result = {
        "applications_open": [],
        "text_content": [],
        "summary": ""
    }
    
    # Extract applications_open array
    apps_match = re.search(r'"applications_open":\s*\[(.*?)\]', output_str, re.DOTALL)
    if apps_match:
        apps_str = apps_match.group(1).strip()
        # Parse the array items
        if apps_str:
            apps_items = re.findall(r'"((?:\\.|[^"\\])*)"', apps_str)
            result["applications_open"] = [item.replace('\\\"', '"').replace('\\\\', '\\') for item in apps_items]
    
    # Extract text_content array - try a even more robust approach that captures all text content items
    # First try to match the full content between array brackets
    text_match = re.search(r'"text_content":\s*\[([\s\S]*?)\],\s*"summary":', output_str)
    if text_match:
        text_content_raw = text_match.group(1).strip()
        
        # Parse each string item in the array
        text_items = []
        
        # Use a custom string parsing approach to handle nested quotes correctly
        in_string = False
        current_item = ""
        escape_next = False
        
        for char in text_content_raw:
            if escape_next:
                current_item += char
                escape_next = False
                continue
                
            if char == '\\':
                escape_next = True
                current_item += char
                continue
                
            if char == '"' and not escape_next:
                if not in_string:
                    in_string = True  # Starting a string
                    current_item = ""  # Reset the current item
                else:
                    in_string = False  # Ending a string
                    if current_item:  # Add the completed item if it's not empty
                        text_items.append(current_item.replace('\\\"', '"').replace('\\\\', '\\'))
                continue
                
            if in_string:  # Only collect characters when inside a string
                current_item += char
                
        # If that failed, try the regex-based approach as a fallback
        if not text_items:
            string_pattern = re.compile(r'"((?:\\.|[^"\\])*)"')
            pos = 0
            
            # Find all strings in the text content section
            while pos < len(text_content_raw):
                match = string_pattern.search(text_content_raw, pos)
                if not match:
                    break
                    
                text_items.append(match.group(1).replace('\\\"', '"').replace('\\\\', '\\'))
                pos = match.end()
                
        result["text_content"] = text_items
    
    # Extract summary
    summary_match = re.search(r'"summary":\s*"((?:\\.|[^"\\])*)"', output_str, re.DOTALL)
    if summary_match:
        summary = summary_match.group(1).replace('\\\"', '"').replace('\\\\', '\\')
        result["summary"] = summary
    
    return result

In [6]:
from typing import Union, Dict, List, Any
import json
from datetime import datetime

def json_to_markdown(
    json_data: Union[str, Dict[str, Any]], 
    output_file: str = "analysis_output_excel.md",
    username: str = "Aarav",
    timestamp: str = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
) -> str:
    """
    Convert JSON data to a nicely formatted Markdown file.
    
    Args:
        json_data: Either a JSON string or a parsed JSON object containing analysis data
        output_file: Path/name of the output markdown file
        username: Current user's login (optional)
        timestamp: Current timestamp (optional)
        
    Returns:
        str: Path to the created markdown file
    """
    # Parse JSON if a string is provided
    if isinstance(json_data, str):
        # Try to parse it directly first
        try:
            # Clean string of code block markers if present
            clean_str = json_data.strip()
            if clean_str.startswith("```") and clean_str.endswith("```"):
                clean_str = clean_str[3:-3].strip()
                if clean_str.startswith("json"):
                    clean_str = clean_str[4:].strip()
                    
            data = json.loads(clean_str)
        except json.JSONDecodeError:
            # If the standard parsing fails, use our custom parser
            data = convert_output_to_json(json_data)
    else:
        data = json_data
    
    # Create markdown content
    md_content = []
    
    # Add title and metadata
    md_content.append("# Desktop Screenshot Analysis\n")
    
    if timestamp or username:
        md_content.append("## Metadata\n")
        if timestamp:
            md_content.append(f"**Timestamp:** {timestamp}\n")
        if username:
            md_content.append(f"**User:** {username}\n")
        md_content.append("\n")
    
    # Add applications section
    md_content.append("## Applications Open\n")
    if data.get("applications_open"):
        for app in data["applications_open"]:
            md_content.append(f"- {app}\n")
    else:
        md_content.append("*No applications detected*\n")
    md_content.append("\n")
    
    # Add text content section
    md_content.append("## Text Content\n")
    if data.get("text_content"):
        md_content.append("```\n")  # Start code block
        for text_item in data["text_content"]:
            md_content.append(f"{text_item}\n")
        md_content.append("```\n")  # End code block
    else:
        md_content.append("*No text content detected*\n")
    md_content.append("\n")
    
    # Add summary section
    md_content.append("## Summary\n")
    if data.get("summary"):
        md_content.append(f"{data['summary']}\n")
    else:
        md_content.append("*No summary available*\n")
    
    # Write to file
    try:
        with open(output_file, 'w', encoding='utf-8') as f:
            f.write(''.join(md_content))
        print(f"Successfully created Markdown file: {output_file}")
        return output_file
    except Exception as e:
        error_msg = f"Error writing to file: {str(e)}"
        print(error_msg)
        return error_msg

In [7]:
# Updated function with video metadata support
def generate_video_analysis_report_updated(analyses: List[dict], video_path: str, username: str, video_metadata: dict = None) -> str:
    """
    Generate a comprehensive markdown report from frame analyses.
    Updated to support video metadata and timestamps.
    
    Args:
        analyses: List of frame analysis dictionaries
        video_path: Path to the original video
        username: Username for the report
        video_metadata: Optional video metadata dictionary with fps, duration, etc.
        
    Returns:
        str: Markdown report content
    """
    
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    report = []
    
    # Header
    report.append("# High-Quality Video Screen Recording Analysis Report\n\n")
    
    # Metadata section
    report.append("## Analysis Metadata\n\n")
    report.append(f"**Video File:** `{os.path.basename(video_path)}`\n")
    report.append(f"**Analysis Timestamp:** {timestamp}\n")
    report.append(f"**User:** {username}\n")
    report.append(f"**Frames Analyzed:** {len(analyses)}\n")
    
    if video_metadata:
        report.append(f"**Video FPS:** {video_metadata.get('fps', 'Unknown'):.2f}\n")
        report.append(f"**Video Duration:** {video_metadata.get('duration', 'Unknown'):.1f}s\n")
        report.append(f"**Total Video Frames:** {video_metadata.get('total_frames', 'Unknown')}\n")
    
    report.append(f"**Analysis Method:** High-Quality Frame Differencing + AI Vision Analysis\n")
    report.append(f"**Frame Quality:** Full Resolution (No Degradation)\n\n")
    
    # Executive Summary
    report.append("## Executive Summary\n\n")
    
    # Extract key applications and activities
    all_applications = set()
    all_text_content = []
    activity_timeline = []
    
    for analysis in analyses:
        frame_analysis = analysis['analysis']
        frame_num = analysis['frame_number']
        timestamp_val = analysis.get('timestamp', 0)
        diff_score = analysis['difference_score']
        
        # Collect applications
        if frame_analysis.get('applications_open'):
            all_applications.update(frame_analysis['applications_open'])
        
        # Collect text content
        if frame_analysis.get('text_content'):
            all_text_content.extend(frame_analysis['text_content'])
        
        # Create timeline entry with timestamp
        if frame_analysis.get('summary'):
            activity_timeline.append(f"**Frame {frame_num}** at {timestamp_val:.2f}s (Change: {diff_score:.3f}): {frame_analysis['summary'][:200]}...")
    
    report.append(f"This high-quality analysis covers {len(analyses)} significant frames extracted from the screen recording. ")
    report.append(f"The user was observed working with {len(all_applications)} different applications/tools. ")
    
    if video_metadata:
        total_frames = video_metadata.get('total_frames', 1)
        reduction_percent = ((total_frames - len(analyses)) / total_frames) * 100
        report.append(f"Frame reduction achieved: {reduction_percent:.1f}% (from {total_frames} to {len(analyses)} frames). ")
    
    report.append("The following report provides detailed insights into user activities and potential security considerations.\n\n")
    
    # Applications used
    report.append("## Applications and Tools Used\n\n")
    if all_applications:
        for app in sorted(all_applications):
            report.append(f"- {app}\n")
    else:
        report.append("*No applications detected*\n")
    report.append("\n")
    
    # Activity Timeline
    report.append("## Activity Timeline\n\n")
    for activity in activity_timeline[:10]:  # Limit to first 10 for brevity
        report.append(f"{activity}\n\n")
    
    if len(activity_timeline) > 10:
        report.append(f"*... and {len(activity_timeline) - 10} more activities*\n\n")
    
    # Detailed Frame Analysis
    report.append("## Detailed Frame Analysis\n\n")
    
    for i, analysis in enumerate(analyses):
        frame_num = analysis['frame_number']
        timestamp_val = analysis.get('timestamp', 0)
        diff_score = analysis['difference_score']
        frame_analysis = analysis['analysis']
        
        report.append(f"### Frame {frame_num} at {timestamp_val:.2f}s (Significance Score: {diff_score:.3f})\n\n")
        
        # Applications in this frame
        if frame_analysis.get('applications_open'):
            report.append("**Applications:**\n")
            for app in frame_analysis['applications_open']:
                report.append(f"- {app}\n")
            report.append("\n")
        
        # Text content
        if frame_analysis.get('text_content'):
            report.append("**Visible Text Content:**\n")
            report.append("```\n")
            for text in frame_analysis['text_content'][:20]:  # Limit to first 20 items
                report.append(f"{text}\n")
            if len(frame_analysis['text_content']) > 20:
                report.append(f"... and {len(frame_analysis['text_content']) - 20} more text elements\n")
            report.append("```\n\n")
        
        # Summary
        if frame_analysis.get('summary'):
            report.append(f"**Analysis:** {frame_analysis['summary']}\n\n")
        
        report.append("---\n\n")
    
    # Quality assurance section
    report.append("## Quality Assurance\n\n")
    report.append("**Frame Extraction Quality:** High\n")
    report.append("- Frames extracted at full video resolution\n")
    report.append("- No quality loss from resizing or format conversion\n")
    report.append("- Lossless PNG compression used for storage\n")
    report.append("- Original video color space preserved\n\n")
    
    # Security considerations
    report.append("## Security and Compliance Notes\n\n")
    report.append("This analysis was performed for access management and security monitoring purposes. ")
    report.append("Key observations:\n\n")
    report.append(f"- Total significant activity changes detected: {len(analyses)}\n")
    report.append(f"- Applications accessed: {len(all_applications)}\n")
    report.append("- All activities have been logged and analyzed for compliance\n")
    report.append("- High-quality frame analysis ensures maximum detection accuracy\n")
    report.append("- This report should be reviewed by authorized personnel only\n\n")
    
    return ''.join(report)

# Alias the updated function to replace the old one
generate_video_analysis_report = generate_video_analysis_report_updated


In [8]:
import time

def analyze_frame_with_retry(image_path: str, max_retries: int = 3, sleep_duration: int = 65) -> dict:
    """
    Analyze a frame with retry logic for rate limiting.
    
    Args:
        image_path: Path to the image file
        max_retries: Maximum number of retry attempts
        sleep_duration: Sleep duration in seconds when rate limited
        
    Returns:
        dict: Analysis result or None if all retries failed
    """
    
    for attempt in range(max_retries + 1):
        try:
            # Generate summary for this frame
            summary = generate_image_summary(image_path, is_url=False)
            
            # Convert to JSON
            summary_json = convert_output_to_json(summary)
            
            return summary_json
            
        except Exception as e:
            error_str = str(e)
            
            # Check if it's a rate limit error (429)
            if "429" in error_str and "rate limit" in error_str.lower():
                if attempt < max_retries:
                    print(f"    ⏳ Rate limit hit (attempt {attempt + 1}/{max_retries + 1}). Sleeping for {sleep_duration} seconds...")
                    time.sleep(sleep_duration)
                    print(f"    🔄 Retrying frame analysis...")
                else:
                    print(f"    ❌ Rate limit exceeded after {max_retries + 1} attempts. Skipping frame.")
                    return None
            else:
                # Non-rate-limit error, don't retry
                print(f"    ❌ Non-rate-limit error: {error_str}")
                return None
    
    return None

def process_video_with_frame_differencing(
    video_path: str,
    output_markdown_path: str,
    difference_threshold: float = 0.1,
    min_frame_interval: int = 5,
    max_frames: int = 100,
    cleanup_temp_files: bool = False,
    username: str = "User",
    resize_factor: float = 1,
    max_retries: int = 2,
    sleep_duration: int = 70
) -> str:
    """
    Complete pipeline to process a video with frame differencing and Llama4 Maverick analysis.
    
    Args:
        video_path (str): Path to input video file
        output_markdown_path (str): Path for output markdown file
        difference_threshold (float): Threshold for frame difference detection
        min_frame_interval (int): Minimum frames between significant frames
        max_frames (int): Maximum number of frames to analyze
        cleanup_temp_files (bool): Whether to delete temporary frame files
        username (str): Username for the analysis report
        resize_factor (float): Factor to resize frames for processing
        max_retries (int): Maximum retry attempts for rate limited requests
        sleep_duration (int): Sleep duration in seconds when rate limited
        
    Returns:
        str: Path to the generated markdown report
    """
    
    print("="*60)
    print("STARTING VIDEO ANALYSIS WITH FRAME DIFFERENCING")
    print("="*60)
    
    try:
        # Step 1: Extract significant frames
        print("\n1. Extracting significant frames from video...")
        significant_frames = extract_significant_frames(
            video_path, 
            difference_threshold=difference_threshold,
            min_frame_interval=min_frame_interval,
            max_frames=max_frames,
            resize_factor=resize_factor
        )
        
        if not significant_frames:
            raise ValueError("No significant frames extracted from video")
        
        # Step 2: Save frames as temporary images
        print("\n2. Saving frames as temporary images...")
        temp_dir = "./temp_frames"
        frame_image_paths = save_frames_as_images(significant_frames, temp_dir)
        
        # Step 3: Analyze each significant frame with Llama4 Maverick
        print("\n3. Analyzing frames with Llama4 Maverick...")
        print(f"   Rate limiting protection: {max_retries} retries, {sleep_duration}s sleep")
        all_analyses = []
        
        for i, image_path in enumerate(frame_image_paths):
            print(f"\n📸 Analyzing frame {i+1}/{len(frame_image_paths)}: {os.path.basename(image_path)}")
            
            # Use retry-enabled analysis
            summary_json = analyze_frame_with_retry(
                image_path, 
                max_retries=max_retries, 
                sleep_duration=sleep_duration
            )
            
            if summary_json is not None:
                # Add frame metadata
                frame_data = significant_frames[i]
                frame_number = frame_data[1]
                diff_score = frame_data[2]
                
                frame_analysis = {
                    "frame_number": frame_number,
                    "difference_score": diff_score,
                    "image_path": image_path,
                    "analysis": summary_json
                }
                
                all_analyses.append(frame_analysis)
                
                print(f"  ✅ Frame {frame_number} analyzed successfully (diff: {diff_score:.3f})")
                
            else:
                # Frame analysis failed after all retries
                frame_data = significant_frames[i]
                frame_number = frame_data[1]
                diff_score = frame_data[2]
                print(f"  ❌ Frame {frame_number} analysis failed after retries (diff: {diff_score:.3f})")
                continue
        
        # Step 4: Generate comprehensive report
        print("\n4. Generating comprehensive analysis report...")
        report_content = generate_video_analysis_report(all_analyses, video_path, username)
        
        # Step 5: Save report
        with open(output_markdown_path, 'w', encoding='utf-8') as f:
            f.write(report_content)
        
        print(f"\n✓ Analysis complete! Report saved to: {output_markdown_path}")
        
        # Step 6: Cleanup temporary files
        if cleanup_temp_files:
            print("\n5. Cleaning up temporary files...")
            try:
                shutil.rmtree(temp_dir)
                print("  ✓ Temporary files cleaned up")
            except Exception as e:
                print(f"  ⚠ Warning: Could not clean up temp files: {str(e)}")
        
        print("\n" + "="*60)
        print("VIDEO ANALYSIS COMPLETED SUCCESSFULLY")
        print("="*60)
        
        return output_markdown_path
        
    except Exception as e:
        print(f"\n❌ Error during video analysis: {str(e)}")
        raise

def generate_video_analysis_report(analyses: List[dict], video_path: str, username: str) -> str:
    """
    Generate a comprehensive markdown report from frame analyses.
    
    Args:
        analyses: List of frame analysis dictionaries
        video_path: Path to the original video
        username: Username for the report
        
    Returns:
        str: Markdown report content
    """
    
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    
    report = []
    
    # Header
    report.append("# Video Screen Recording Analysis Report\n\n")
    
    # Metadata section
    report.append("## Analysis Metadata\n\n")
    report.append(f"**Video File:** `{os.path.basename(video_path)}`\n")
    report.append(f"**Analysis Timestamp:** {timestamp}\n")
    report.append(f"**User:** {username}\n")
    report.append(f"**Frames Analyzed:** {len(analyses)}\n")
    report.append(f"**Analysis Method:** Frame Differencing + AI Vision Analysis\n\n")
    
    # Executive Summary
    report.append("## Executive Summary\n\n")
    
    # Extract key applications and activities
    all_applications = set()
    all_text_content = []
    activity_timeline = []
    
    for analysis in analyses:
        frame_analysis = analysis['analysis']
        frame_num = analysis['frame_number']
        diff_score = analysis['difference_score']
        
        # Collect applications
        if frame_analysis.get('applications_open'):
            all_applications.update(frame_analysis['applications_open'])
        
        # Collect text content
        if frame_analysis.get('text_content'):
            all_text_content.extend(frame_analysis['text_content'])
        
        # Create timeline entry
        if frame_analysis.get('summary'):
            activity_timeline.append(f"**Frame {frame_num}** (Change: {diff_score:.3f}): {frame_analysis['summary'][:200]}...")
    
    report.append(f"This analysis covers {len(analyses)} significant frames extracted from the screen recording. ")
    report.append(f"The user was observed working with {len(all_applications)} different applications/tools. ")
    report.append("The following report provides detailed insights into user activities and potential security considerations.\n\n")
    
    # Applications used
    report.append("## Applications and Tools Used\n\n")
    if all_applications:
        for app in sorted(all_applications):
            report.append(f"- {app}\n")
    else:
        report.append("*No applications detected*\n")
    report.append("\n")
    
    # Activity Timeline
    report.append("## Activity Timeline\n\n")
    for activity in activity_timeline[:10]:  # Limit to first 10 for brevity
        report.append(f"{activity}\n\n")
    
    if len(activity_timeline) > 10:
        report.append(f"*... and {len(activity_timeline) - 10} more activities*\n\n")
    
    # Detailed Frame Analysis
    report.append("## Detailed Frame Analysis\n\n")
    
    for i, analysis in enumerate(analyses):
        frame_num = analysis['frame_number']
        diff_score = analysis['difference_score']
        frame_analysis = analysis['analysis']
        
        report.append(f"### Frame {frame_num} (Significance Score: {diff_score:.3f})\n\n")
        
        # Applications in this frame
        if frame_analysis.get('applications_open'):
            report.append("**Applications:**\n")
            for app in frame_analysis['applications_open']:
                report.append(f"- {app}\n")
            report.append("\n")
        
        # Text content
        if frame_analysis.get('text_content'):
            report.append("**Visible Text Content:**\n")
            report.append("```\n")
            for text in frame_analysis['text_content'][:20]:  # Limit to first 20 items
                report.append(f"{text}\n")
            if len(frame_analysis['text_content']) > 20:
                report.append(f"... and {len(frame_analysis['text_content']) - 20} more text elements\n")
            report.append("```\n\n")
        
        # Summary
        if frame_analysis.get('summary'):
            report.append(f"**Analysis:** {frame_analysis['summary']}\n\n")
        
        report.append("---\n\n")
    
    # Security considerations
    report.append("## Security and Compliance Notes\n\n")
    report.append("This analysis was performed for access management and security monitoring purposes. ")
    report.append("Key observations:\n\n")
    report.append(f"- Total significant activity changes detected: {len(analyses)}\n")
    report.append(f"- Applications accessed: {len(all_applications)}\n")
    report.append("- All activities have been logged and analyzed for compliance\n")
    report.append("- This report should be reviewed by authorized personnel only\n\n")
    
    return ''.join(report)


In [20]:
# UPDATED EXECUTION WITH RATE LIMITING PROTECTION
# Use this cell instead of the previous one for better rate limiting handling

# Enhanced configuration with rate limiting protection
ENHANCED_CONFIG = {
    # Frame Extraction Settings
    'difference_threshold': 0.1,    # Lower = more sensitive to changes (0.05-0.3 recommended)
    'min_frame_interval': 5,        # Minimum frames between captures (reduces redundancy)
    'max_frames': 60,                # Maximum frames to analyze (reduced for rate limiting)
    'resize_factor': 1,            # Resize frames for faster processing (0.3-1.0)
    
    # Rate Limiting Protection Settings - NEW!
    'max_retries': 5,                # Number of retry attempts for rate limited requests
    'sleep_duration': 70,            # Sleep duration in seconds when rate limited (70+ recommended)
    
    # General Settings
    'cleanup_temp_files': False,      # Clean up temporary frame files after processing
    'username': 'AK_Test2'   # Username for the report
}

print("🛡️ ENHANCED VIDEO ANALYSIS WITH RATE LIMITING PROTECTION")
print("="*70)
print(f"📊 Enhanced Configuration:")
for key, value in ENHANCED_CONFIG.items():
    print(f"   {key}: {value}")

print(f"\n⏱️  Estimated max processing time: {ENHANCED_CONFIG['max_frames'] * ENHANCED_CONFIG['sleep_duration'] / 60:.1f} minutes")
print(f"   (if all frames hit rate limits)")

try:
    print(f"\n🚀 Starting enhanced video analysis...")
    print(f"📹 Input: {video_path_coding_vba}")
    print(f"📄 Output: {output_video_coding_vba}")
    
    # Process the video with enhanced settings
    result_path = process_video_with_frame_differencing(
        video_path=video_path_coding_vba,
        output_markdown_path=output_video_coding_vba,
        difference_threshold=ENHANCED_CONFIG['difference_threshold'],
        min_frame_interval=ENHANCED_CONFIG['min_frame_interval'],
        max_frames=ENHANCED_CONFIG['max_frames'],
        cleanup_temp_files=ENHANCED_CONFIG['cleanup_temp_files'],
        username=ENHANCED_CONFIG['username'],
        resize_factor=ENHANCED_CONFIG['resize_factor'],
        max_retries=ENHANCED_CONFIG['max_retries'],
        sleep_duration=ENHANCED_CONFIG['sleep_duration']
    )
    
    print(f"\n🎉 SUCCESS! Enhanced video analysis completed.")
    print(f"📄 Report saved to: {result_path}")
    print(f"💰 Cost optimization: Only {ENHANCED_CONFIG['max_frames']} frames analyzed")
    print(f"🛡️ Rate limiting protection: {ENHANCED_CONFIG['max_retries']} retries, {ENHANCED_CONFIG['sleep_duration']}s sleep")
    
except Exception as e:
    print(f"\n❌ Error during enhanced processing: {str(e)}")
    print("Please check your video file path and configuration.")

print(f"\n" + "="*70)
print("💡 RATE LIMITING TIPS:")
print("="*70)
print(f"• Current API limit: 0.6 queries/minute (1 query every ~100 seconds)")
print(f"• Sleep duration: {ENHANCED_CONFIG['sleep_duration']}s provides buffer for rate limiting")
print(f"• Max retries: {ENHANCED_CONFIG['max_retries']} attempts per frame")
print(f"• Frames reduced to {ENHANCED_CONFIG['max_frames']} for faster completion")
print(f"• Processing will automatically retry failed frames after rate limit cooldown")
print(f"• Monitor console output for retry status and progress")

# Uncomment to run a quick test with just 3 frames:
# test_result = process_video_with_frame_differencing(
#     video_path=video_path_coding_vba,
#     output_markdown_path="./Outputs/test_analysis.md",
#     max_frames=3,
#     max_retries=2,
#     sleep_duration=70
# )


🛡️ ENHANCED VIDEO ANALYSIS WITH RATE LIMITING PROTECTION
📊 Enhanced Configuration:
   difference_threshold: 0.1
   min_frame_interval: 5
   max_frames: 60
   resize_factor: 1
   max_retries: 5
   sleep_duration: 70
   cleanup_temp_files: False
   username: AK_Test2

⏱️  Estimated max processing time: 70.0 minutes
   (if all frames hit rate limits)

🚀 Starting enhanced video analysis...
📹 Input: ./VidRecordings/CodingVBA_sample.mp4
📄 Output: ./Outputs/analysis_output_coding_vba.md
STARTING VIDEO ANALYSIS WITH FRAME DIFFERENCING

1. Extracting significant frames from video...
Video info: 52697 frames, 59.99 FPS
Processing frames for significant changes...
Progress: 0.2% (100/52697)
Progress: 0.4% (200/52697)
Progress: 0.6% (300/52697)
Progress: 0.8% (400/52697)
Progress: 0.9% (500/52697)
Progress: 1.1% (600/52697)
Progress: 1.3% (700/52697)
Significant frame found: Frame 723, Change: 0.165
Significant frame found: Frame 728, Change: 0.112
Progress: 1.5% (800/52697)
Progress: 1.7% (900/52

In [10]:
def test_extract_significant_frames_only(
    video_path: str,
    output_dir: str = "./test_SignificantImages",
    difference_threshold: float = 0.1,
    min_frame_interval: int = 5,
    max_frames: int = 20,
    resize_factor: float = 1
) -> dict:
    """
    Test function to extract and save significant frames only.
    
    Args:
        video_path: Path to input video
        output_dir: Directory to save extracted frames
        difference_threshold: Threshold for frame difference detection
        min_frame_interval: Minimum frames between significant frames
        max_frames: Maximum frames to extract
        resize_factor: Factor for processing speed (doesn't affect output quality)
        
    Returns:
        dict: Summary of extraction results
    """
    
    print("🧪 TESTING: High-Quality Frame Extraction")
    print("="*50)
    
    try:
        # Step 1: Extract significant frame metadata
        print(f"📹 Analyzing video: {os.path.basename(video_path)}")
        frame_metadata, video_metadata = extract_significant_frames(
            video_path=video_path,
            difference_threshold=difference_threshold,
            min_frame_interval=min_frame_interval,
            max_frames=max_frames,
            resize_factor=resize_factor
        )
        
        if not frame_metadata:
            return {"status": "failed", "reason": "No significant frames found"}
        
        # Step 2: Extract high-quality frames
        print(f"💾 Saving frames to: {output_dir}")
        frame_paths = extract_high_quality_frames_from_video(
            video_path=video_path,
            frame_metadata=frame_metadata,
            output_dir=output_dir
        )
        
        # Prepare results summary
        results = {
            "status": "success",
            "video_file": os.path.basename(video_path),
            "frames_extracted": len(frame_paths),
            "output_directory": output_dir,
            "video_metadata": video_metadata,
            "frame_details": [
                {
                    "frame_number": frame_num,
                    "timestamp": timestamp,
                    "difference_score": diff_score,
                    "filename": os.path.basename(frame_paths[i]) if i < len(frame_paths) else None
                }
                for i, (frame_num, timestamp, diff_score) in enumerate(frame_metadata)
            ]
        }
        
        print(f"✅ Successfully extracted {len(frame_paths)} frames")
        print(f"📁 Frames saved in: {output_dir}")
        
        return results
        
    except Exception as e:
        return {"status": "failed", "reason": str(e)}

# Quick test function with minimal parameters
def quick_frame_test(video_path: str, max_frames: int = 10) -> None:
    """Quick test with default settings."""
    results = test_extract_significant_frames_only(
        video_path=video_path,
        max_frames=max_frames,
        output_dir="./quick_test_frames"
    )
    
    if results["status"] == "success":
        print(f"🎉 Quick test completed: {results['frames_extracted']} frames extracted")
    else:
        print(f"❌ Quick test failed: {results['reason']}")
        
    return results

quick_frame_test(video_path_coding_vba, max_frames=40)

🧪 TESTING: High-Quality Frame Extraction
📹 Analyzing video: CodingVBA_sample.mp4
Video info: 52697 frames, 59.99 FPS, 878.5s duration
Processing frames for significant changes...
Significant frame found: Frame 723, Time: 12.05s, Change: 0.165
Significant frame found: Frame 728, Time: 12.14s, Change: 0.112
Progress: 1.9% (1000/52697)
Progress: 3.8% (2000/52697)
Progress: 5.7% (3000/52697)
Progress: 7.6% (4000/52697)
Significant frame found: Frame 4777, Time: 79.63s, Change: 0.772
Significant frame found: Frame 4860, Time: 81.02s, Change: 0.156
Progress: 9.5% (5000/52697)
Significant frame found: Frame 5025, Time: 83.77s, Change: 0.127
Significant frame found: Frame 5154, Time: 85.92s, Change: 0.177
Progress: 11.4% (6000/52697)
Progress: 13.3% (7000/52697)
Progress: 15.2% (8000/52697)
Significant frame found: Frame 8581, Time: 143.05s, Change: 0.140
Significant frame found: Frame 8706, Time: 145.13s, Change: 0.248
Progress: 17.1% (9000/52697)
Significant frame found: Frame 9797, Time: 16

{'status': 'success',
 'video_file': 'CodingVBA_sample.mp4',
 'frames_extracted': 40,
 'output_directory': './quick_test_frames',
 'video_metadata': {'fps': 59.986340093722134,
  'total_frames': 52697,
  'duration': 878.4833333333333,
  'video_path': './VidRecordings/CodingVBA_sample.mp4'},
 'frame_details': [{'frame_number': 0,
   'timestamp': 0.0,
   'difference_score': 1.0,
   'filename': 'frame_000000_diff_1.000.png'},
  {'frame_number': 723,
   'timestamp': 12.052743989221398,
   'difference_score': 0.16497974537037038,
   'filename': 'frame_000723_diff_0.165.png'},
  {'frame_number': 728,
   'timestamp': 12.13609629896705,
   'difference_score': 0.1115113811728395,
   'filename': 'frame_000728_diff_0.112.png'},
  {'frame_number': 4777,
   'timestamp': 79.63479673099671,
   'difference_score': 0.7715345293209876,
   'filename': 'frame_004777_diff_0.772.png'},
  {'frame_number': 4860,
   'timestamp': 81.01844507277454,
   'difference_score': 0.15628954475308643,
   'filename': 'fra