In [None]:
import json
import csv

def load_squad_data(file_path):
    with open(file_path, 'r') as file:
        squad_dict = json.load(file)
    return squad_dict['data']

def extract_rows_values(squad_data):
    rows_values = []
    for value in squad_data:
        for paragraph in value['paragraphs']:
            context = paragraph['context']
            for qa in paragraph['qas']:
                question = qa['question']
                answer = qa['answers'][0]['text']
                start_pos = qa['answers'][0]['answer_start']
                end_pos = start_pos + len(answer)
                rows_values.append([str(context), str(question), str(answer), start_pos, end_pos])
    return rows_values

def convert_to_csv(file_path, rows_values):
    with open(file_path, 'w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(['context', 'question', 'answer', 'start_pos', 'end_pos'])
        writer.writerows(rows_values)

def preprocess(file_path):
    squad_data = load_squad_data(file_path)
    rows_values = extract_rows_values(squad_data)
    convert_to_csv(file_path, rows_values)

In [None]:
def process_and_save_data(input_file, output_file):
    data = preprocess(input_file)
    convert_to_csv(output_file, data)

process_and_save_data('spoken_train-v1.1.json', 'squad_train_data.csv')

In [None]:
process_and_save_data('spoken_test-v1.1.json', 'squad_test_data.csv')