# Code to transform automatically generated transcriptions from aTrain (.json files) to Praat transcriptions (.TextGrid files) 

#### by [Victoria Reshetnikova](https://www.linkedin.com/in/victoria-reshetnikova-8227b9177/) @ Utrecht University, written with Python 3.12.7

## Before you start

## Setting Up Python and Jupyter Notebook

### 1. Install Python  
To run the code, you need to have Python installed. You can check if Python is installed by running:  



or  

If Python is not installed, download and install it from the official website:  
[Download Python](https://www.python.org/downloads/)  

Make sure to check the box **"Add Python to PATH"** during installation.

---

## 2. Install Jupyter Notebook  
Once Python is installed, open a terminal (Command Prompt, Terminal, or PowerShell) and install Jupyter Notebook by running:  


If you are using **Anaconda**, Jupyter is already included.

---

## 3. Open Jupyter Notebook  
After installation, start Jupyter Notebook by running:  

This will open Jupyter Notebook in your default web browser.

You are now ready to run the code!

## If you want to try this code out for a folder containg automatic transcriptions for your audio files generated by aTrain, run the code  below and make sure you have the same folder structure as below.
 
base_folder (e.g., auto_transcripts)  
&nbsp;&nbsp; &nbsp;audiofile1name_folder (e.g., participant01)  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;log.txt  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;metadata.txt  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;transcription_maxqda.txt  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;transcription_timestamps.txt  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;transcription.json  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;transcription.srt  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;transcription.txt  
&nbsp;&nbsp;&nbsp;audiofile2name_folder (e.g., participant02)  
&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;...

In [None]:
#install textgrid package for python to be able to create TextGrid files

!pip install textgrid

#you can also try !pip3 install textgrid if this line does not work for you

In [None]:
import os
import json
import textgrid
from textgrid import IntervalTier, Interval
import shutil  # To copy files

# Define the base folder where your subfolders with transcriptions are located
base_folder = '...'  # <-- PUT THE NAME OF THE FOLDER THAT CONTAINS ATRAIN TRANSCRIPTIONS

# Define the "TextGrid transcriptions" folder where all generated TextGrid files will be stored
textgrid_transcriptions_folder = os.path.join(base_folder, "TextGrid transcriptions")
os.makedirs(textgrid_transcriptions_folder, exist_ok=True)  # Create the folder if it doesn't exist

# Deduction value for adjusting start and end times of the words (default 0, can be adjusted if needed)
deduction_value = 0.0  # <-- ADJUST THIS VALUE IF YOU NOTICE THAT THE GENERATED TRANSCRIPTIONS SEEM TO START LATER COMPARED TO THE AUDIO FILE

# Loop through each subfolder in the base folder
for subfolder in os.listdir(base_folder):
    folder_path = os.path.join(base_folder, subfolder)

    # Skip if it's not a directory (we only want to process subfolders)
    if not os.path.isdir(folder_path):
        continue

    print(f"Processing folder: {subfolder}")

    # Define paths for the JSON transcription and the TextGrid file
    json_file = os.path.join(folder_path, 'transcription.json')
    textgrid_file = os.path.join(folder_path, 'transcription.TextGrid')

    # Check if the JSON file exists
    if os.path.exists(json_file):
        with open(json_file, 'r') as f:
            data = json.load(f)

        # Initialize a TextGrid object to store all speaker tiers
        tg = textgrid.TextGrid()
        speaker_tiers = {}

        # Process each segment in the transcription data and adjust word timings
        for segment in data['segments']:
            first_word_start = None
            last_word_end = None
            current_speaker = segment.get('speaker', "Unknown")

            # If the segment has words, process them
            if len(segment['words']) > 0:
                for word in segment['words']:
                    adjusted_start = word['start'] - deduction_value
                    adjusted_end = word['end'] - deduction_value

                    # Track the first and last word's times for each segment
                    if first_word_start is None:
                        first_word_start = adjusted_start
                    last_word_end = adjusted_end

                    # Get the speaker for the current word (if available)
                    speaker = word.get('speaker', current_speaker)

                    # If the speaker changes, create a new tier for the previous speaker
                    if speaker != current_speaker:
                        if current_speaker not in speaker_tiers:
                            speaker_tiers[current_speaker] = []
                        speaker_tiers[current_speaker].append((first_word_start, last_word_end, segment['text']))
                        first_word_start = adjusted_start
                        last_word_end = adjusted_end
                        current_speaker = speaker

                # Add the final segment for the last speaker
                if current_speaker not in speaker_tiers:
                    speaker_tiers[current_speaker] = []
                speaker_tiers[current_speaker].append((first_word_start, last_word_end, segment['text']))

        # Create a tier for each speaker and add intervals
        for speaker, intervals in speaker_tiers.items():
            tier = IntervalTier(speaker, min(start for start, _, _ in intervals), max(end for _, end, _ in intervals))
            existing_boundaries = set()

            # Ensure no overlap by slightly adjusting the start and end times
            for start, end, text in intervals:
                if start in existing_boundaries:
                    start += 0.001
                if end in existing_boundaries:
                    end += 0.001

                existing_boundaries.add(start)
                existing_boundaries.add(end)

                interval = Interval(start, end, text)
                tier.addInterval(interval)

            # Adjust the first and last interval if needed
            if tier.intervals:
                tier.intervals[0].minTime += 0.001
                tier.intervals[-1].maxTime -= 0.001

            # Add the tier to the TextGrid object
            tg.append(tier)

        # Save the TextGrid file in the original subfolder
        with open(textgrid_file, 'w') as f:
            tg.write(f)

        print(f'TextGrid file saved in: {textgrid_file}')

        # Save a copy of the TextGrid file in the "TextGrid transcriptions" folder
        textgrid_copy_path = os.path.join(textgrid_transcriptions_folder, f"{subfolder}.TextGrid")
        shutil.copy(textgrid_file, textgrid_copy_path)
        print(f'TextGrid file also saved in: {textgrid_copy_path} \n')
    else:
        # If the transcription.json doesn't exist, print a warning message
        print(f"Warning: No transcription.json found in {subfolder}, skipping this folder. \n")


## If you want to try this code out for one aTrain transcription file (.json format), run the code below.

In [None]:
import json
import textgrid
from textgrid import IntervalTier, Interval #make sure textgrid is installed (check first code box on how to install it in case it is not)

file_json = '...' # <-- PUT THE PATH TO THE ATRAIN FILE YOU WOULD LIKE TO RUN THE CODE FOR

# Load the JSON file
with open(file_json) as json_file:
    data = json.load(json_file)

# Deduction value for start and end times (default 0, or ask user to adjust)
deduction_value = 0.0 # <-- ADJUST THIS VALUE IF YOU NOTICE THAT THE GENERATED TRANSCRIPTIONS SEEM TO START LATER COMPARED TO THE AUDIO FILE

# Initialize TextGrid object
tg = textgrid.TextGrid()

# Dictionary to hold speaker tiers and their intervals
speaker_tiers = {}

# Process segments and deduct the start and end times
for segment in data['segments']:
    first_word_start = None
    last_word_end = None
    current_speaker = segment.get('speaker', "Unknown")

    if len(segment['words']) > 0:
        for word in segment['words']:
            # Adjust start and end times by deduction value
            adjusted_start = word['start'] - deduction_value
            adjusted_end = word['end'] - deduction_value
    
            if first_word_start is None:
                first_word_start = adjusted_start
            last_word_end = adjusted_end
    
            # Ensure we have the speaker's name from the word if available
            speaker = word.get('speaker', current_speaker)
    
            # When the speaker changes, record the last interval for the previous speaker
            if speaker != current_speaker:
                if current_speaker not in speaker_tiers:
                    speaker_tiers[current_speaker] = []
                speaker_tiers[current_speaker].append((first_word_start, last_word_end, segment['text']))
                first_word_start = adjusted_start
                last_word_end = adjusted_end
                current_speaker = speaker
    
        # Append the final interval for the last speaker in this segment
        if current_speaker not in speaker_tiers:
            speaker_tiers[current_speaker] = []
        speaker_tiers[current_speaker].append((first_word_start, last_word_end, segment['text']))

# Create tiers for each speaker
for speaker, intervals in speaker_tiers.items():
    tier = IntervalTier(speaker, min(start for start, _, _ in intervals), max(end for _, end, _ in intervals))

    # Set to store the existing boundaries
    existing_boundaries = set()

    # Write each interval for this speaker
    for start, end, text in intervals:
        # Check if start or end is already a boundary, add a small offset if necessary
        if start in existing_boundaries:
            start += 0.001  # Add a small offset to avoid duplicate
        if end in existing_boundaries:
            end += 0.001  # Add a small offset to avoid duplicate
        
        # Add the new boundaries to the set
        existing_boundaries.add(start)
        existing_boundaries.add(end)
        
        # Create the interval and add it to the tier
        interval = Interval(start, end, text)
        tier.addInterval(interval)  # Add the interval to the tier
    
    # Adjust the first and last intervals' boundaries for visibility
    if len(tier.intervals) > 0:
        first_interval = tier.intervals[0]  # First interval
        first_interval.minTime = first_interval.minTime + 0.001  # Move the first boundary slightly forward
        
        last_interval = tier.intervals[-1]  # Last interval
        last_interval.maxTime = last_interval.maxTime - 0.001  # Move the last boundary slightly backward

    tg.append(tier)

# Replace the .json extension with .TextGrid
output_file = os.path.splitext(file_json)[0] + '.TextGrid'

# Write the complete TextGrid content to a file
with open(output_file, 'w') as textgrid_file:
    tg.write(textgrid_file)
    print('TextGrid file saved to:', output_file)
