## Install Requirements

In [1]:
!pip install pandas

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
!pip install openpyxl

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Frameworks/Python.framework/Versions/3.7/bin/python3.7 -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


## Convert Growth Curve Sheets

In [3]:
import pandas as pd
import os

# Load growth curve files from current directory
sheetNames = ['chou_HC.xlsx', 'chou_length.xlsx', 'chou_weight.xlsx']

for sheet in sheetNames:
    old_filename = sheet
    new_filename = old_filename.split('.')[0]

    # Read the XLSX file
    data = pd.read_excel(old_filename)

    # Group data by GA_GROUP and Sex
    grouped_data = data.groupby(['GA_GROUP', 'Sex'])

    # Iterate through each group
    for group, group_data in grouped_data:
        ga_group, sex = group
        sheet_name = f'{new_filename}_GA{ga_group}_{sex}'

        # Remove empty fields
        columns_to_remove = ['PatientSeqID', 'Weight', 'Length', 'DailyHC']
        group_data = group_data.drop(columns=columns_to_remove)

        # Multiply specified columns if 'weight' is in the filename
        if 'weight' in old_filename:
            cols_to_multiply = ['percentile_3_var', 'percentile_10_var', 'percentile_25_var', 'percentile_50_var',
                                'percentile_75_var', 'percentile_90_var', 'percentile_97_var']
            group_data[cols_to_multiply] = group_data[cols_to_multiply].apply(lambda x: x * 1000)

        # Save the sheet as a JSON file
        json_filename = f'{sheet_name}.json'
        group_data.to_json(json_filename, orient='records')

## Convert Sample Workbook Curves to JSON

In [4]:
import openpyxl
import json

# Load Excel workbook from current directory
workbook = openpyxl.load_workbook('GA22wkGroupTablesFemaleWkbk.xlsx')

# Iterate through each sheet
for sheet in workbook.worksheets:
    # Get headers from the first row
    headers = [cell.value for cell in sheet[1]]

    # Convert rows to dictionaries
    data = [dict(zip(headers, row)) for row in sheet.iter_rows(min_row=2, values_only=True)]

    # Remove empty values
    clean_data = [{k: v for k, v in row.items() if v is not None and v != ''} for row in data]

    # Multiply values by 1000 if sheet is titled "Chou_weight"
    if sheet.title == "Chou_weight":
        for row in clean_data:
            for key in ["percentile_3_var", "percentile_10_var", "percentile_25_var", "percentile_75_var", "percentile_90_var", "percentile_97_var"]:
                if key in row:
                    row[key] = row[key] * 1000

    # Write sheet to JSON file
    with open(f'{sheet.title}.json', 'w') as outfile:
        json.dump(clean_data, outfile)
