In [1]:
import os
config = os.path.join('configs','resnet_mfcc.yaml')
run_name = 'debug_flag'
max_samples = 20
debug = True

In [2]:
# Load YAML config and display top-level sections
import os, pprint, sys, subprocess

# Robust import of yaml with auto-install fallback
try:
    import yaml
except ModuleNotFoundError:
    print("PyYAML not found in current kernel. Attempting installation...")
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', 'PyYAML'])
    import yaml
    print("PyYAML installed successfully.")

assert os.path.isfile(config), f"Config file not found: {config}"
with open(config, 'r') as f:
    cfg = yaml.safe_load(f)

# Keep cfg in notebook variable for later experiment use
cfg_dict = cfg

In [3]:
cfg_dict

{'data': {'root': 'D:\\Projects\\Technion\\SpeechEmotionRecognition\\data',
  'feature_type': 'mfcc',
  'feature_params': {'n_mfcc': 40, 'add_delta': True, 'add_delta2': True},
  'sr': 16000,
  'cache_dir': 'cache_features/mfcc',
  'fixed_frames': 256},
 'model': {'name': 'resnet',
  'params': {'in_channels': 1,
   'base_channels': 32,
   'num_classes': 8,
   'depth': 4}},
 'train': {'batch_size': 32,
  'epochs': 30,
  'optimizer': {'lr': 0.001, 'weight_decay': 0.0005},
  'amp': True,
  'accumulate_steps': 1,
  'early_patience': 10},
 'logging': {'log_dir': 'logs/resnet_mfcc_run1',
  'checkpoint_dir': 'checkpoints/resnet_mfcc_run1',
  'tensorboard': True}}

In [4]:
from datasets import EmotionDataset, DatasetConfig

In [14]:
# Dataset
ds_cfg = DatasetConfig(
    data_root=cfg_dict['data']['root'],
    feature_name=cfg_dict['data']['feature_type'],
    target_sr=cfg_dict['data'].get('sr', 16000),
    cache_dir=cfg_dict['data'].get('cache_dir', f"cache_features/{cfg_dict['data']['feature_type']}"),
    feature_params=cfg_dict['data'].get('feature_params', {}),
    file_ext=cfg_dict['data'].get('ext', '.wav'),
    fixed_frames=cfg_dict['data'].get('fixed_frames', None),
)
ds_full = EmotionDataset(ds_cfg)

In [7]:
db_val = ds_full.debug_getmeta()

In [13]:
db_val['labels'][10000:10020]

[3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3]