<!-- filepath: d:\Projects\Technion\EmotionDetection\migration_api_usage.ipynb -->
<VSCode.Cell language="markdown">
# EmotionDetection Migration & Archival
This notebook archives legacy Python modules and notebooks into the `old` directory and demonstrates usage of the new modular API (features, datasets, models, training). Set `dry_run=True` first to preview operations safely.
</VSCode.Cell>
<VSCode.Cell language="python">
# 1. Configure Paths and Parameters
from pathlib import Path
import shutil, json, ast, difflib, datetime, argparse, sys
from dataclasses import dataclass

BASE_DIR = Path(__file__).parent if '__file__' in globals() else Path.cwd()
ARCHIVE_DIR = BASE_DIR / 'old'
TARGET_EXTS = ['.py', '.ipynb']
EXCLUDE_DIR_NAMES = {'old', 'models', 'training', 'experiments', 'configs'}  # keep new API dirs
DRY_RUN = True  # change to False after validating plan
VERBOSE = True

# Legacy file name heuristics (explicit list for precision)
LEGACY_FILES = {
    'DataManipulation.py', 'DataVisualization.py', 'DeepLearningBlocks.py', 'DeepLearningPyTorch.py'
}

# Notebook pattern
NOTEBOOK_PREFIX = 'prj_SER_'
</VSCode.Cell>
<VSCode.Cell language="python">
# 2. Enumerate Legacy Files (Notebook + Code Files)
import pandas as pd

legacy_candidates = []
for p in BASE_DIR.iterdir():
    if p.name in EXCLUDE_DIR_NAMES:
        continue
    if p.is_file():
        if (p.suffix in TARGET_EXTS) and (p.name in LEGACY_FILES or p.name.startswith(NOTEBOOK_PREFIX)):
            legacy_candidates.append(p)

# Also include files deeper (only top-level notebooks expected)
for p in BASE_DIR.rglob('*'):
    if any(part in EXCLUDE_DIR_NAMES for part in p.parts):
        continue
    if p.is_file() and (p.suffix in TARGET_EXTS) and (p.name.startswith(NOTEBOOK_PREFIX)) and (p.parent == BASE_DIR):
        if p not in legacy_candidates:
            legacy_candidates.append(p)

summary = pd.DataFrame({'path': [str(p) for p in legacy_candidates], 'ext': [p.suffix for p in legacy_candidates]})
print(f"Found {len(summary)} legacy artifacts")
summary.groupby('ext').size().rename('count')
</VSCode.Cell>
<VSCode.Cell language="python">
# 3. Create Archive Directory If Missing
if VERBOSE:
    print(f"Archive dir: {ARCHIVE_DIR}")
if not DRY_RUN:
    ARCHIVE_DIR.mkdir(parents=True, exist_ok=True)
else:
    print("[DRY_RUN] Would create archive directory if absent.")
</VSCode.Cell>
<VSCode.Cell language="python">
# 4. Move Files to Archive (Atomic Operations)
errors = []
moved = []
for src in legacy_candidates:
    dst = ARCHIVE_DIR / src.name
    if VERBOSE:
        print(f"Planning move: {src.name} -> {dst}")
    if DRY_RUN:
        continue
    try:
        if dst.exists():
            # Avoid overwriting inadvertently; append timestamp
            stamped = ARCHIVE_DIR / f"{src.stem}_dup_{int(datetime.datetime.utcnow().timestamp())}{src.suffix}"
            shutil.move(str(src), stamped)
            moved.append((src, stamped))
        else:
            shutil.move(str(src), dst)
            moved.append((src, dst))
    except Exception as e:
        errors.append((src, repr(e)))

print(f"Moves completed (dry_run={DRY_RUN}).")
</VSCode.Cell>
<VSCode.Cell language="python">
# 5. Delete Originals After Successful Move
# (Since we used shutil.move originals are already removed if DRY_RUN=False)
if DRY_RUN:
    print("[DRY_RUN] Skipping deletion (handled inherently by move).")
else:
    print("Deletion implicit in move; verifying...")
    for src, dst in moved:
        if src.exists() and dst.exists():
            errors.append((src, 'Original still exists after move'))
</VSCode.Cell>
<VSCode.Cell language="python">
# 6. Post-Move Verification and Reporting
report = {
    'timestamp': datetime.datetime.utcnow().isoformat(),
    'dry_run': DRY_RUN,
    'moved_count': len(moved) if not DRY_RUN else 0,
    'errors': errors,
    'files_planned': [str(p) for p in legacy_candidates]
}
print(json.dumps(report, indent=2))
if not DRY_RUN:
    with open(ARCHIVE_DIR / 'archive_report.json', 'w') as f:
        json.dump(report, f, indent=2)
else:
    print('[DRY_RUN] Report not written.')
</VSCode.Cell>
<VSCode.Cell language="python">
# 7. Clean Up Empty Source Directories (not expected for flat layout)
if DRY_RUN:
    print('[DRY_RUN] Skipping directory cleanup.')
else:
    for d in sorted(BASE_DIR.iterdir()):
        if d.is_dir() and d.name != 'old':
            try:
                if not any(d.iterdir()):
                    d.rmdir()
                    if VERBOSE:
                        print(f'Removed empty directory: {d}')
            except OSError:
                pass
</VSCode.Cell>
<VSCode.Cell language="python">
# 8. Generate Migration Notebook File Programmatically
MIGRATION_NOTEBOOK_PATH = BASE_DIR / 'migration.ipynb'
if DRY_RUN:
    print('[DRY_RUN] Would generate migration.ipynb')
else:
    nb_struct = {
        'cells': [],
        'metadata': {
            'kernelspec': {
                'name': 'python3',
                'language': 'python',
                'display_name': 'Python 3'
            },
            'language_info': {'name': 'python'}
        },
        'nbformat': 4,
        'nbformat_minor': 5
    }
    with open(MIGRATION_NOTEBOOK_PATH, 'w') as f:
        json.dump(nb_struct, f)
    print(f'Created empty notebook at {MIGRATION_NOTEBOOK_PATH}')
</VSCode.Cell>
<VSCode.Cell language="python">
# 9. Populate Migration Notebook: New API Imports (Demonstration inline instead of editing separate file)
from importlib import import_module

try:
    features_mod = import_module('audio_features')
    datasets_mod = import_module('datasets')
    models_mod = import_module('models')
    training_mod = import_module('training.loops')
    print('Imported new modular API successfully.')
    print('Features available:', features_mod.available_features())
    print('Models available:', models_mod.available_models())
except Exception as e:
    print('Import failed (ensure running from EmotionDetection root):', e)
</VSCode.Cell>
<VSCode.Cell language="markdown">
## Old vs New API Overview
Below we show a conceptual mapping. The old `TrainModel` + `RunEpoch` functions are now replaced by `train_loop` with a `TrainConfig` dataclass.
</VSCode.Cell>
<VSCode.Cell language="python">
# 10. Side-by-Side Old vs New API Usage (Conceptual)
old_example = """
from DeepLearningPyTorch import TrainModel, RunEpoch, EvaluateModel
# oModel, lTrainLoss, ... = TrainModel(model, dlTrain, dlVal, optimizer, nEpochs, loss_fn, score_fn)
"""

new_example = """
from training.loops import TrainConfig, train_loop
cfg = TrainConfig(epochs=1, lr=1e-4, device='cpu')
res = train_loop(model, train_loader, val_loader, loss_fn, score_fn, cfg)
"""
print('OLD API:\n', old_example)
print('NEW API:\n', new_example)
</VSCode.Cell>
<VSCode.Cell language="python">
# 11. Batch Refactor Helper (AST based rename demonstration)
RENAME_MAP = {
    'TrainModel': 'train_loop',
    'RunEpoch': 'train_loop',  # conceptual replacement
    'EvaluateModel': 'train_loop'  # using separate evaluation pipeline later
}

def refactor_source(src: str, rename_map: dict) -> str:
    class Renamer(ast.NodeTransformer):
        def visit_Name(self, node: ast.Name):
            if node.id in rename_map:
                return ast.copy_location(ast.Name(id=rename_map[node.id], ctx=node.ctx), node)
            return node
    tree = ast.parse(src)
    tree = Renamer().visit(tree)
    ast.fix_missing_locations(tree)
    return src if DRY_RUN else ast.unparse(tree)

sample_old_code = """
from DeepLearningPyTorch import TrainModel\nTrainModel(model, dlTrain, dlVal, opt, 10, loss_fn, score_fn)\n"""
if DRY_RUN:
    # Show textual diff only
    transformed = sample_old_code.replace('TrainModel', 'train_loop')
else:
    transformed = refactor_source(sample_old_code, RENAME_MAP)

print('--- Diff Preview ---')
for line in difflib.unified_diff(sample_old_code.splitlines(), transformed.splitlines(), lineterm=''):
    print(line)
</VSCode.Cell>
<VSCode.Cell language="python">
# 12. Logging and Dry-Run Mode Helper
def log(msg: str):
    if VERBOSE:
        print(msg)

log(f"Dry run mode is {'ON' if DRY_RUN else 'OFF'}")
</VSCode.Cell>
<VSCode.Cell language="python">
# 13. Simple pytest Test Stubs for New API
TESTS_DIR = BASE_DIR / 'tests'
if DRY_RUN:
    print('[DRY_RUN] Would create tests/test_new_api.py')
else:
    TESTS_DIR.mkdir(exist_ok=True)
    test_file = TESTS_DIR / 'test_new_api.py'
    test_src = """import importlib\n\nfeatures = importlib.import_module('audio_features')\nmodels = importlib.import_module('models')\n\ndef test_feature_registry_presence():\n    assert len(features.available_features()) >= 1\n\ndef test_model_registry_presence():\n    assert len(models.available_models()) >= 1\n"""
    test_file.write_text(test_src)
    print('Created', test_file)
</VSCode.Cell>
<VSCode.Cell language="python">
# 14. Optional CLI Argument Parsing Inside Notebook
if '__file__' not in globals():  # Only when executed as a script via nbconvert/papermill
    parser = argparse.ArgumentParser()
    parser.add_argument('--dry_run', action='store_true', help='Perform dry run without moving files')
    parser.add_argument('--verbose', action='store_true', help='Verbose logging')
    args, unknown = parser.parse_known_args()
    DRY_RUN = args.dry_run or DRY_RUN
    VERBOSE = args.verbose or VERBOSE
    log(f"Args applied: dry_run={DRY_RUN}, verbose={VERBOSE}")
</VSCode.Cell>
<VSCode.Cell language="python">
# 15. Persist Execution Metadata
meta = {
    'start_time': report.get('timestamp'),
    'end_time': datetime.datetime.utcnow().isoformat(),
    'planned_files': report.get('files_planned'),
    'dry_run': DRY_RUN,
    'moved_count': report.get('moved_count'),
    'error_count': len(report.get('errors', []))
}
if DRY_RUN:
    print('[DRY_RUN] Would write metadata.json')
else:
    with open(ARCHIVE_DIR / 'metadata.json', 'w') as f:
        json.dump(meta, f, indent=2)
print(json.dumps(meta, indent=2))
</VSCode.Cell>
