<a href="https://colab.research.google.com/github/hyperepoch/TeachMe/blob/code_me/evidence_review.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import files
import json
import io
from collections import defaultdict

class JSONReader:
    def __init__(self, file):
        self.file = file

    def read_json_auto(self):
        text = self.file.read()
        self.file.seek(0)
        try:
            return json.loads(text), "standard"
        except json.JSONDecodeError:
            self.file.seek(0)
            try:
                objects = []
                for line in text.strip().splitlines():
                    if line.strip():
                        objects.append(json.loads(line))
                return objects, "jsonl"
            except json.JSONDecodeError as e:
                raise ValueError(f"Unrecognized or malformed JSON format: {e}")

class JSONFormatter:
    def __init__(self, data, format_type):
        self.data = data
        self.format_type = format_type

    def pretty_print(self):
        print(f"\nüîç Detected format: {self.format_type.upper()}\n")
        formatted = json.dumps(self.data, indent=4, sort_keys=True)
        print(formatted)

class JSONReportGenerator:
    def __init__(self, data):
        self.data = data if isinstance(data, list) else [data]  # wrap dict in list
        self.report_file = "management_report.txt"

    def generate_summary(self):
        lines = []
        lines.append("\nüìä Management Summary Report\n" + "-" * 40)
        lines.append(f"Total Records: {len(self.data)}\n")

        all_keys = set()
        type_counter = defaultdict(int)
        sample_values = defaultdict(list)

        for record in self.data:
            if isinstance(record, dict):
                for key, value in record.items():
                    all_keys.add(key)
                    type_counter[type(value).__name__] += 1
                    if len(sample_values[key]) < 3:
                        sample_values[key].append(str(value)[:40])
            else:
                type_counter[type(record).__name__] += 1

        lines.append(f"Keys Found: {len(all_keys)}")
        lines.append(f"Field Names Sample: {sorted(list(all_keys))[:10]}")
        lines.append("\nField Types:")
        for dtype, count in type_counter.items():
            lines.append(f" - {dtype}: {count}")

        lines.append("\nSample Values (first 3 per field):")
        for key, values in sample_values.items():
            lines.append(f" - {key}: {values}")
        lines.append("-" * 40)

        # Save report to file
        with open(self.report_file, 'w') as f:
            f.write("\n".join(lines))

        print("\n‚úÖ Management summary written to:", self.report_file)
        files.download(self.report_file)

class JSONViewer:
    def __init__(self, uploaded_file):
        self.reader = JSONReader(uploaded_file)
        self.data = None
        self.format_type = None

    def display(self):
        try:
            self.data, self.format_type = self.reader.read_json_auto()
            formatter = JSONFormatter(self.data, self.format_type)
            formatter.pretty_print()

            report = JSONReportGenerator(self.data)
            report.generate_summary()
        except Exception as e:
            print(f"‚ùå Error: {e}")

# Upload the file
uploaded = files.upload()

# Use the first uploaded file
for filename in uploaded.keys():
    with io.StringIO(uploaded[filename].decode('utf-8')) as file_obj:
        viewer = JSONViewer(file_obj)
        viewer.display()


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
            "GEO_STREET_NAME": {
                "count": 1,
                "occurrences": [
                    {
                        "context": "Service Address: 502 2ND ST, HIGHSPIRE, PA, 17034\nBoiler\nCalculate th",
                        "edm_matches": [],
                        "page_number": "1",
                        "text": "502 2ND ST"
                    }
                ],
                "pd_category": "LOCATION"
            },
            "NAME_PART": {
                "count": 6,
                "occurrences": [
                    {
                        "context": "Additional Cubic Feet Needed(CFN-RV): 0.00",
                        "edm_matches": [],
                        "page_number": "1",
                        "text": "Cubic"
                    },
                    {
                        "context": "Additional Cubic Feet Needed(CFN-RV): 0.00",
                        "edm_matche

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>