In [1]:
import json
import re
import logging

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.DEBUG)

def parse_json_safely(text):
    logger.debug(f"Attempting to parse JSON from: {text}")
    
    try:
        return json.loads(text)
    except json.JSONDecodeError as e:
        logger.debug(f"Initial JSON parse failed: {e}")
    
    # Pattern to match JSON objects and arrays without recursion
    json_like_pattern = r'(\{(?:[^{}]|"[^"]*")*\}|\[(?:[^\[\]]|"[^"]*")*\])'
    match = re.search(json_like_pattern, text)
    
    while match:
        json_str = match.group(0)
        try:
            return json.loads(json_str)
        except json.JSONDecodeError as e:
            logger.debug(f"JSON-like structure parse failed: {e}")
        match = re.search(json_like_pattern, text[match.end():])  # Continue searching after the last match
    
    # Manual parsing attempt if regex fails
    manual_parse = {}
    key_value_pattern = r'"?([\w_]+)"?\s*:\s*("(?:\\.|[^"\\])*"|\d+|true|false|null)'
    matches = re.finditer(key_value_pattern, text)
    
    for match in matches:
        key, value = match.groups()
        try:
            parsed_value = json.loads(value)
        except json.JSONDecodeError:
            parsed_value = value.strip('"')
        manual_parse[key] = parsed_value
    
    if manual_parse:
        logger.info(f"Manually parsed JSON: {manual_parse}")
        return manual_parse
    
    logger.warning("Failed to parse any JSON-like structure")
    return None


# Example usage and testing
if __name__ == "__main__":
    test_cases = [
        '{"year_s": "2023", "ticker_s": "AAPL", "quarter_s": "Q2"}',
        'Here is the JSON: {"year_s": "2023", "ticker_s": "AAPL", "quarter_s": "Q2"}',
        'The JSON is: {year_s: "2023", ticker_s: "AAPL", quarter_s: "Q2"}',
        'year_s: "2023", ticker_s: "AAPL", quarter_s: "Q2"',
        'Invalid JSON {year_s: 2023, ticker_s: AAPL, quarter_s: Q2}',
    ]

    for i, case in enumerate(test_cases, 1):
        result = parse_json_safely(case)
        print(f"Test case {i}:")
        print(f"Input: {case}")
        print(f"Result: {result}")
        print()

Test case 1:
Input: {"year_s": "2023", "ticker_s": "AAPL", "quarter_s": "Q2"}
Result: {'year_s': '2023', 'ticker_s': 'AAPL', 'quarter_s': 'Q2'}



error: unknown extension ?R at position 12