In [None]:
from json_therule0 import JSONCleaner, JSONReader, AdvancedJSONReader
import json

# Use the sample data provided in the repository
filepath = 'data/sample_data.json'
print(f"Loading data from: {filepath}")

## Phase 1: Loading and Initial State

The `JSONCleaner` automatically initializes a `JSONLoader` to load and validate the JSON file.

In [None]:
# Initialize the cleaner (loads data automatically)
cleaner = JSONCleaner(filepath)
print(f"Cleaner object: {cleaner}")
print(f"String representation: {str(cleaner)}")

# Access raw data via the new get_raw_data() method
raw_data = cleaner.get_raw_data()
print(f"\nRaw data (first 2 records):")
print(json.dumps(raw_data[:2], indent=2))

## Phase 2: Data Cleaning with Method Chaining

The `JSONCleaner` uses a **fluent interface** (method chaining) where each method returns `self`, allowing elegant pipelines.

In [None]:
# Chain multiple cleaning operations
cleaned_data = (
    cleaner
    .trim_whitespace()          # Remove leading/trailing whitespace
    .remove_null_values()        # Remove null fields
    .remove_duplicates()         # Keep only unique records
    .get_cleaned_data()          # Extract the final result
)

print(f"State after cleaning: {cleaner}")
print(f"Cleaned data (first 2 records):")
print(json.dumps(cleaned_data[:2], indent=2))

## Phase 3: Reading and Analysis

Pass cleaned data to `JSONReader` for read-only analysis with dunder methods demonstration.

In [None]:
# Create a reader for analysis
reader = JSONReader(cleaned_data)
print(f"Reader repr: {repr(reader)}")
print(f"Reader str: {str(reader)}")
print(f"Length (using __len__): {len(reader)}")

# Get dataset shape
rows, cols = reader.shape()
print(f"\nDataset shape: {rows} rows × {cols} columns")

# List all columns
columns = reader.get_columns()
print(f"Columns: {columns}")

## Phase 4: Basic Analysis Methods

The `JSONReader` provides built-in analysis methods.

In [None]:
# Summary statistics
summary = reader.summary_stats()
print("Summary Statistics:")
for col, stats in summary.items():
    print(f"  {col}: {stats}")

# Count missing values
missing = reader.count_missing_values()
print(f"\nMissing values per column: {missing}")

## Phase 5: Advanced Analysis with Inheritance

The `AdvancedJSONReader` subclass demonstrates **inheritance and polymorphism**, extending the base reader with specialized methods.

In [None]:
# Create an AdvancedJSONReader (subclass of JSONReader)
advanced_reader = AdvancedJSONReader(cleaned_data)
print(f"Advanced reader: {advanced_reader}")

# Get dataset description
description = advanced_reader.describe()
print(f"\nDataset Description:")
for key, value in description.items():
    print(f"  {key}: {value}")

## Phase 6: Advanced Methods

Use specialized methods only available in the advanced reader.

In [None]:
# Get unique values for a column (if it exists)
if 'category' in advanced_reader.get_columns():
    unique_categories = advanced_reader.get_unique_values('category')
    print(f"Unique categories: {unique_categories}")

# Filter by value
if 'status' in advanced_reader.get_columns():
    filtered = advanced_reader.filter_by_value('status', 'active')
    print(f"\nFiltered reader (active records): {filtered}")
else:
    print("No 'status' column found in sample data.")

## Phase 7: Export to CSV

Export cleaned data to CSV using the advanced reader's export functionality.

In [None]:
# Export to CSV
output_path = 'data/cleaned_output.csv'
try:
    advanced_reader.export_to_csv(output_path)
    print(f"✓ Successfully exported to {output_path}")
except Exception as e:
    print(f"Note: CSV export skipped (sample data may not have suitable columns)")
    print(f"  Error: {e}")

## Summary: OOP Principles Demonstrated

This notebook showcases key OOP concepts implemented in json_therule0:

| Concept | Example |
|---------|--------|
| **Classes** | JSONLoader, JSONCleaner, JSONReader, AdvancedJSONReader |
| **Encapsulation** | Private attributes (`__raw_data`, `__cleaned_data`); public accessors |
| **Inheritance** | AdvancedJSONReader extends JSONReader |
| **Polymorphism** | Subclass adds `export_to_csv()`, `filter_by_value()`, `describe()` |
| **Composition** | JSONCleaner contains JSONLoader instance |
| **Dunder Methods** | `__repr__()`, `__str__()`, `__eq__()`, `__len__()` |
| **Method Chaining** | Fluent interface for intuitive data cleaning pipelines |

## Installation & Usage

To install and use json_therule0 in your projects:

```bash
pip install json-therule0
```

Then import and use:

```python
from json_therule0 import JSONCleaner, AdvancedJSONReader

cleaner = JSONCleaner('data.json')
cleaned = cleaner.trim_whitespace().remove_null_values().get_cleaned_data()
reader = AdvancedJSONReader(cleaned)
print(reader.describe())
```