In [1]:
import pandas as pd
import numpy as np

# Load the Excel file
file_path = 'Exercises Data 7.0 with Descriptions.xlsx'
sheet_name = 'Sheet1'

df = pd.read_excel(file_path, sheet_name=sheet_name)

# List of columns to clean (The others are fine)
columns_to_parse = [
    'Main muscle(s)', 'Secondary muscles', 'Level', 'Equipment',
    'Prerequesite Exercise', 'Variations', 'Regression', 'Progression',
    'Exercise Purpose', 'Force type (Push, Pull, Rotation, Isomatric)',
    'Movement Pattern (Squat, Hinge, Lunge, Push, Pull, Carry, Twist)', 'Pain Exclusions'
]

# Replace "None" strings with NaN
for col in columns_to_parse:
    if col in df.columns:
        print(f"🔄 Replacing 'None' in column: {col}")
        df[col] = df[col].replace('None', np.nan).replace('none', np.nan)


🔄 Replacing 'None' in column: Main muscle(s)
🔄 Replacing 'None' in column: Secondary muscles
🔄 Replacing 'None' in column: Level
🔄 Replacing 'None' in column: Equipment
🔄 Replacing 'None' in column: Prerequesite Exercise
🔄 Replacing 'None' in column: Variations
🔄 Replacing 'None' in column: Regression
🔄 Replacing 'None' in column: Progression
🔄 Replacing 'None' in column: Exercise Purpose
🔄 Replacing 'None' in column: Force type (Push, Pull, Rotation, Isomatric)
🔄 Replacing 'None' in column: Movement Pattern (Squat, Hinge, Lunge, Push, Pull, Carry, Twist)
🔄 Replacing 'None' in column: Pain Exclusions


In [2]:
import ast

def parse_column(cell):
    """
    Safely parse a cell into a Python list.
    Handles NaN, float, and string representations of lists.
    """
    if pd.isna(cell) or isinstance(cell, float):  # Handle NaN or float values
        return []
    if isinstance(cell, str):
        try:
            # Attempt to evaluate the string into a Python object (list, dict, etc.)
            return ast.literal_eval(cell)
        except (ValueError, SyntaxError):
            # If parsing fails, return as a single-item list with the string
            return [cell.strip()]
    if isinstance(cell, list):
        return cell  # Already a list
    return []



In [3]:
# List of columns to parse
columns_to_parse = [
    'Main muscle(s)', 'Secondary muscles', 'Level', 'Equipment',
    'Prerequesite Exercise', 'Variations', 'Regression', 'Progression',
    'Exercise Purpose', 'Force type (Push, Pull, Rotation, Isomatric)',
    'Movement Pattern (Squat, Hinge, Lunge, Push, Pull, Carry, Twist)', 'Pain Exclusions'
]

# Apply the parsing function
for col in columns_to_parse:
    if col in df.columns:
        print(f"🔄 Parsing column: {col}")
        df[col] = df[col].apply(parse_column)


🔄 Parsing column: Main muscle(s)
🔄 Parsing column: Secondary muscles
🔄 Parsing column: Level
🔄 Parsing column: Equipment
🔄 Parsing column: Prerequesite Exercise
🔄 Parsing column: Variations
🔄 Parsing column: Regression
🔄 Parsing column: Progression
🔄 Parsing column: Exercise Purpose
🔄 Parsing column: Force type (Push, Pull, Rotation, Isomatric)
🔄 Parsing column: Movement Pattern (Squat, Hinge, Lunge, Push, Pull, Carry, Twist)
🔄 Parsing column: Pain Exclusions


In [4]:
print("🔍 Verifying Parsed Column Types:")
for col in columns_to_parse:
    if col in df.columns:
        print(f"{col}: {df[col].apply(type).unique()}")


🔍 Verifying Parsed Column Types:
Main muscle(s): [<class 'list'>]
Secondary muscles: [<class 'list'>]
Level: [<class 'list'>]
Equipment: [<class 'list'>]
Prerequesite Exercise: [<class 'list'>]
Variations: [<class 'list'>]
Regression: [<class 'list'>]
Progression: [<class 'list'>]
Exercise Purpose: [<class 'list'>]
Force type (Push, Pull, Rotation, Isomatric): [<class 'list'>]
Movement Pattern (Squat, Hinge, Lunge, Push, Pull, Carry, Twist): [<class 'list'>]
Pain Exclusions: [<class 'list'>]


In [5]:
def save_dataset_to_json(df, file_path='dataset.json'):
    """
    Save a DataFrame to a JSON file, preserving complex data structures.
    """
    df.to_json(file_path, orient='records', indent=2)
    print(f"✅ Dataset saved to {file_path}")


In [6]:
save_dataset_to_json(df, 'dataset_7.json')


✅ Dataset saved to dataset_7.json


In [7]:
def load_dataset_from_json(file_path='dataset_7.json'):
    """
    Load a DataFrame from a JSON file, preserving complex data structures.
    """
    df = pd.read_json(file_path)
    print(f"✅ Dataset loaded from {file_path}")
    return df

# Reload and inspect the dataset
df_loaded = load_dataset_from_json('dataset_7.json')
print(df_loaded.head())


✅ Dataset loaded from dataset_7.json
              push up       Animation name Main muscle(s)  \
0        Wall push up                 None        [Chest]   
1    Kneeling push up                 None        [Chest]   
2             Push up       Normal Push-up        [Chest]   
3  Close grip push up              Body-Up        [Chest]   
4   Loop band push up  Band Push-up_female        [Chest]   

      Secondary muscles Movement      Type Lower bound (lbs/resistance/time)  \
0  [Shoulders, triceps]     Push  Compound                                 0   
1  [Shoulders, triceps]     Push  Compound                                 0   
2  [Shoulders, triceps]     Push  Compound                                 0   
3  [Shoulders, triceps]     Push  Compound                                 0   
4  [Shoulders, triceps]     Push  Compound                             Light   

       Level  Difficulty        Equipment  ...          Progression  \
0        [1]           1         [[None]]  .

In [8]:
print("🔍 Verifying Parsed Column Types:")
for col in columns_to_parse:
    if col in df_loaded.columns:
        print(f"{col}: {df_loaded[col].apply(type).unique()}")

🔍 Verifying Parsed Column Types:
Main muscle(s): [<class 'list'>]
Secondary muscles: [<class 'list'>]
Level: [<class 'list'>]
Equipment: [<class 'list'>]
Prerequesite Exercise: [<class 'list'>]
Variations: [<class 'list'>]
Regression: [<class 'list'>]
Progression: [<class 'list'>]
Exercise Purpose: [<class 'list'>]
Force type (Push, Pull, Rotation, Isomatric): [<class 'list'>]
Movement Pattern (Squat, Hinge, Lunge, Push, Pull, Carry, Twist): [<class 'list'>]
Pain Exclusions: [<class 'list'>]
