In [8]:
import nbformat
from nbconvert.preprocessors import ExecutePreprocessor, CellExecutionError
import os

def update_model_save_paths(nb, output_dir):
    for cell in nb.cells:
        if cell.cell_type == 'code':
            if 'word2vec' in cell.source and '.save(' in cell.source:
                lines = cell.source.split('\n')
                new_lines = []
                for line in lines:
                    if '.save(' in line:
                        parts = line.split('.save(')
                        model_name = parts[1].split(')')[0].strip('\'"')
                        new_model_name = os.path.basename(model_name)  # Use only the file name part
                        new_model_path = os.path.join(output_dir, new_model_name)
                        new_line = f'{parts[0]}.save("{new_model_path}")'
                        new_lines.append(new_line)
                    else:
                        new_lines.append(line)
                cell.source = '\n'.join(new_lines)

def execute_notebook(notebook_path, output_dir, executed_notebooks=set()):
    if notebook_path in executed_notebooks:
        print(f"Skipping already executed notebook: {notebook_path}")
        return

    executed_notebooks.add(notebook_path)
    
    print(f"Executing notebook: {notebook_path}")
    
    with open(notebook_path) as f:
        nb = nbformat.read(f, as_version=4)
    
    update_model_save_paths(nb, output_dir)
    
    ep = ExecutePreprocessor(timeout=600, kernel_name='python3')
    
    try:
        ep.preprocess(nb, {'metadata': {'path': os.path.dirname(notebook_path)}})
        
        # Check for references to other notebooks
        for cell in nb.cells:
            if cell.cell_type == 'code':
                if '%run' in cell.source:
                    lines = cell.source.split('\n')
                    for line in lines:
                        if line.strip().startswith('%run'):
                            referenced_notebook = line.split('%run')[1].strip()
                            referenced_notebook_path = os.path.join(os.path.dirname(notebook_path), referenced_notebook)
                            if os.path.exists(referenced_notebook_path):
                                execute_notebook(referenced_notebook_path, output_dir, executed_notebooks)
        
        print(f"Executed {notebook_path} successfully.")
    except CellExecutionError as e:
        print(f"Error executing {notebook_path}: {e}")

def main():
    base_path = '/Users/aaryansingh/Non-Native Influences Investigation/Languages'
    output_dir = '/Users/aaryansingh/Non-Native Influences Investigation/Output'
    os.makedirs(output_dir, exist_ok=True)
    
    languages = ['Hindi', 'Tamil', 'Telugu', 'CMU']  # Add more languages as needed
    
    executed_notebooks = set()
    
    # Execute language-specific notebooks
    for language in languages:
        notebook_path = os.path.join(base_path, language, f'{language.lower()}_model.ipynb')
        if os.path.exists(notebook_path):
            execute_notebook(notebook_path, output_dir, executed_notebooks)
        else:
            print(f"Notebook not found: {notebook_path}")
    
    # Execute the additional notebooks
    additional_notebooks = [
        '/Users/aaryansingh/Non-Native Influences Investigation/Languages/Transcripts/get_phone_pairs.ipynb',
        '/Users/aaryansingh/Non-Native Influences Investigation/Languages/Transcripts/get_replaced_phone_pair_scores.ipynb'
    ]
    
    for notebook_path in additional_notebooks:
        if os.path.exists(notebook_path):
            execute_notebook(notebook_path, output_dir, executed_notebooks)
        else:
            print(f"Notebook not found: {notebook_path}")

if __name__ == "__main__":
    main()


Executing notebook: /Users/aaryansingh/Non-Native Influences Investigation/Languages/Hindi/hindi_model.ipynb
Executed /Users/aaryansingh/Non-Native Influences Investigation/Languages/Hindi/hindi_model.ipynb successfully.
Executing notebook: /Users/aaryansingh/Non-Native Influences Investigation/Languages/Tamil/tamil_model.ipynb
Executed /Users/aaryansingh/Non-Native Influences Investigation/Languages/Tamil/tamil_model.ipynb successfully.
Executing notebook: /Users/aaryansingh/Non-Native Influences Investigation/Languages/Telugu/telugu_model.ipynb
Executed /Users/aaryansingh/Non-Native Influences Investigation/Languages/Telugu/telugu_model.ipynb successfully.
Executing notebook: /Users/aaryansingh/Non-Native Influences Investigation/Languages/CMU/cmu_model.ipynb
Executed /Users/aaryansingh/Non-Native Influences Investigation/Languages/CMU/cmu_model.ipynb successfully.
Executing notebook: /Users/aaryansingh/Non-Native Influences Investigation/Languages/Transcripts/get_phone_pairs.ipynb
E