In [1]:
import sys
import json
from typing import Dict, Any
sys.path.append('..')
from analyzer import FileAnalyzer

MPS acceleration available


In [2]:
def file_analyzer_method(file_obj: Dict[str, Any]):
    analyzer = FileAnalyzer(large_file_threshold_mb=100)
    return analyzer.analyze(file_obj)

### Basic file analysis example

In [3]:
obj = {
    "path": "/Users/hello/Documents/Junk/basic_root/images/suzuki.jpg",
    "name": "suzuki.jpg",
    "suffix": ".jpg",
    "category": "image",
    "size": 4339794,
    "modified": "2025-05-20 11:49:47",
    "mime_type": "image/jpeg"
}

try:
    result = file_analyzer_method(file_obj=obj)
    print(json.dumps(result, indent=2, default=str))
    
except TypeError as e:
    print(e)

except ValueError as e:
    print(e)

except FileNotFoundError as e:
    print(e)

except PermissionError as e:
    print(e)

except OSError as e:
    print(e)

except RuntimeError as e:
    print(e)

except UnicodeDecodeError as e:
    print(e)

except MemoryError as e:
    print(e)

except ConnectionError as e:
    print(e)

except TimeoutError as e:
    print(e)

{
  "path": "/Users/hello/Documents/Junk/basic_root/images/suzuki.jpg",
  "name": "suzuki.jpg",
  "suffix": ".jpg",
  "category": "image",
  "size": 4339794,
  "modified": "2025-05-20 11:49:47",
  "mime_type": "image/jpeg",
  "content_analysis": "image_analyzed",
  "ai_insights": {
    "primary_object": "motorcycle",
    "confidence": "92.0%",
    "object_category": "detected_object"
  }
}


### Preview extracted PDF text

In [4]:
pdf_file = "/Users/hello/Documents/Junk/basic_root/pdf files/example.pdf"

analyzer = FileAnalyzer()
pdf_text = analyzer._extract_pdf_text(pdf_file)

print(pdf_text[:100])

Prompt  
Engineering
Author: Lee Boonstra
Prompt Engineering
September 20242Acknowledgements
Reviewe


### Invalid file analysis examples
All analysis examples below demonstrate invalid or error scenarios.

In [5]:
try:
    analyzer = FileAnalyzer(large_file_threshold_mb=0)

except TypeError as e:
    print(e)

except ValueError as e:
    print(e)

except FileNotFoundError as e:
    print(e)

except PermissionError as e:
    print(e)

except OSError as e:
    print(e)

except RuntimeError as e:
    print(e)

except UnicodeDecodeError as e:
    print(e)

except MemoryError as e:
    print(e)

except ConnectionError as e:
    print(e)

except TimeoutError as e:
    print(e)

large_file_threshold_mb must be between 1 and 300 MB


In [6]:
obj = {
    "path": "/iamhappy",
    "name": "sunrise-in-small-town-in-lofoten.jpg",
    "suffix": ".jpg",
    "category": "image",
    "size": 4339794,
    "modified": "2025-05-20 11:49:47",
    "mime_type": "image/jpeg"
}

try:
    result = file_analyzer_method(file_obj=obj)
    print(json.dumps(result, indent=2, default=str))
    
except TypeError as e:
    print(e)

except ValueError as e:
    print(e)

except FileNotFoundError as e:
    print(e)

except PermissionError as e:
    print(e)

except OSError as e:
    print(e)

except RuntimeError as e:
    print(e)

except UnicodeDecodeError as e:
    print(e)

except MemoryError as e:
    print(e)

except ConnectionError as e:
    print(e)

except TimeoutError as e:
    print(e)

{
  "path": "/iamhappy",
  "name": "sunrise-in-small-town-in-lofoten.jpg",
  "suffix": ".jpg",
  "category": "image",
  "size": 4339794,
  "modified": "2025-05-20 11:49:47",
  "mime_type": "image/jpeg",
  "content_analysis": "image_error: File not found: /iamhappy",
  "ai_insights": {}
}


In [7]:
obj = {
    "path": "/Users/hello/Documents/Junk/basic_root/images/sunrise-in-small-town-in-lofoten.jpg",
    "name": "sunrise-in-small-town-in-lofoten.jpg",
    "suffix": ".jpg",
    "category": "document",
    "size": 4339794,
    "modified": "2025-05-20 11:49:47",
    "mime_type": "image/jpeg"
}

try:
    result = file_analyzer_method(file_obj=obj)
    print(json.dumps(result, indent=2, default=str))
    
except TypeError as e:
    print(e)

except ValueError as e:
    print(e)

except FileNotFoundError as e:
    print(e)

except PermissionError as e:
    print(e)

except OSError as e:
    print(e)

except RuntimeError as e:
    print(e)

except UnicodeDecodeError as e:
    print(e)

except MemoryError as e:
    print(e)

except ConnectionError as e:
    print(e)

except TimeoutError as e:
    print(e)

{
  "path": "/Users/hello/Documents/Junk/basic_root/images/sunrise-in-small-town-in-lofoten.jpg",
  "name": "sunrise-in-small-town-in-lofoten.jpg",
  "suffix": ".jpg",
  "category": "document",
  "size": 4339794,
  "modified": "2025-05-20 11:49:47",
  "mime_type": "image/jpeg",
  "content_analysis": "document_analyzed",
  "ai_insights": {
    "content": "unsupported_document_type"
  }
}


In [8]:
secret_file = {
    "path": "/Users/hello/Documents/Junk/basic_root/secret.jpg",
    "name": "secret.jpg",
    "suffix": ".jpg",
    "category": "image",
    "size": 0,
    "modified": "2025-06-07 12:58:01",
    "mime_type": "image/jpeg"
}

try:
    result = file_analyzer_method(file_obj=secret_file)
    print(json.dumps(result, indent=2, default=str))
    
except TypeError as e:
    print(e)

except ValueError as e:
    print(e)

except FileNotFoundError as e:
    print(e)

except PermissionError as e:
    print(e)

except OSError as e:
    print(e)

except RuntimeError as e:
    print(e)

except UnicodeDecodeError as e:
    print(e)

except MemoryError as e:
    print(e)

except ConnectionError as e:
    print(e)

except TimeoutError as e:
    print(e)

{
  "path": "/Users/hello/Documents/Junk/basic_root/secret.jpg",
  "name": "secret.jpg",
  "suffix": ".jpg",
  "category": "image",
  "size": 0,
  "modified": "2025-06-07 12:58:01",
  "mime_type": "image/jpeg",
  "content_analysis": "image_error: I/O error while opening image: [Errno 13] Permission denied: '/Users/hello/Documents/Junk/basic_root/secret.jpg'",
  "ai_insights": {}
}
