# Code-Switching Predictability Analysis Pipeline


## Setup


In [None]:
!git clone -q https://github.com/jacobposchl/bison-word-predictability.git
!cd bison-word-predictability && pip install -r requirements.txt


In [None]:
from google.colab import files
import shutil
from pathlib import Path


In [None]:
import os
os.chdir('bison-word-predictability')


If the raw data is under a zipped file, you can unzip here

In [None]:
import zipfile

zip_path = '/content/bison-word-predictability/Spaced-20260211T091322Z-3-001.zip' # ** RENAME TO YOUR DESTINATION **
extract_to = '/content/bison-word-predictability/raw_data'

if os.path.exists(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_to)
    print(f"Successfully unzipped to: {extract_to}")
else:
    print(f"Error: Could not find file at {zip_path}")

## 1. Preprocessing


In [None]:
!git pull

In [None]:
!python scripts/preprocess/preprocess.py


In [None]:
preprocessing_zip = 'preprocessing_results.zip'
if Path('results/preprocessing').exists():
    shutil.make_archive('preprocessing_results', 'zip', 'results/preprocessing')
    files.download(preprocessing_zip)
    print(f"Downloaded {preprocessing_zip}")

## 2. Matching


In [None]:
!python scripts/matching/matching.py


In [None]:
matching_zip = 'matching.zip'
if Path('results/matching').exists():
    shutil.make_archive('matching', 'zip', 'results/matching')
    files.download(matching_zip)
    print(f"Downloaded {matching_zip}")


## 3. Surprisal Analysis


In [None]:
!python scripts/surprisal/surprisal.py --model masked


In [None]:
!python scripts/surprisal/surprisal.py --model autoregressive


In [None]:
surprisal_zip = 'surprisal.zip'
if Path('results/surprisal').exists():
    shutil.make_archive('surprisal', 'zip', 'results/surprisal')
    files.download(surprisal_zip)
    print(f"Downloaded {surprisal_zip}")