In [1]:
import pandas as pd 
import numpy as np
import os

In [2]:
# Read the CSV file containing the feature data into a DataFrame
df = pd.read_csv('/Users//sarithavuppula//Downloads/Summer School/final_version_data.csv')
# Remove the first column (assuming it's an unnecessary index column)
df = df.iloc[:,1:]

In [6]:
from tqdm import tqdm
# For displaying a progress bar during iterations
from scipy.stats import kurtosis
# For calculating the kurtosis of the data

# Define the column names for the feature DataFrame
colnames = ['cycle', 'condition', 'mode',
            'pin_mean', 'po_mean', 'pdmp_mean',
            'pin_var', 'po_var', 'pdmp_var',
            'pin_kurtosis', 'po_kurtosis', 'pdmp_kurtosis']

# Initialize an empty array to hold the extracted features
features = np.empty((0, len(colnames)))

# Iterate over each unique condition in the DataFrame
for condition in tqdm(df['condition'].unique()):
    condition_df = df[df.condition == condition]  # Filter the DataFrame for the current condition
    
    # Iterate over each unique mode within the current condition
    for mode in condition_df['mode'].unique():
        mode_df = condition_df[condition_df['mode'] == mode]  # Filter the DataFrame for the current mode
        
        # Iterate over each unique cycle within the current mode
        for cycle in mode_df['cycle'].unique():    
            cycle_df = mode_df[mode_df.cycle == cycle]  # Filter the DataFrame for the current cycle
            
            # Calculate the mean values for the 'pin', 'po', and 'pdmp' columns
            mean_vals = np.mean(cycle_df[['pin', 'po', 'pdmp']], axis=0).tolist()
            
            # Calculate the variance values for the 'pin', 'po', and 'pdmp' columns
            variance_vals = np.var(cycle_df[['pin', 'po', 'pdmp']], axis=0).tolist()
            
            # Calculate the kurtosis values for the 'pin', 'po', and 'pdmp' columns
            kurtosis_vals = kurtosis(cycle_df[['pin', 'po', 'pdmp']]).tolist()
            
            # Extract meta data (cycle, condition, mode) for the current cycle
            meta_data = [np.unique(cycle_df['cycle'])[0], np.unique(cycle_df['condition'])[0], np.unique(cycle_df['mode'])[0]]
            
            # Combine meta data and calculated features into a single array
            feature = np.array(meta_data + mean_vals + variance_vals + kurtosis_vals).reshape(1, -1)
            
            # Concatenate the current feature array with the overall features array
            features = np.concatenate([features, feature], axis=0)

# Convert the features array to a DataFrame with specified column names
features = pd.DataFrame(features, columns=colnames)

# Convert 'cycle', 'condition', and 'mode' columns to integer data types
features[['cycle', 'condition', 'mode']] = features[['cycle', 'condition', 'mode']].astype(int)

# Save the final features DataFrame to a CSV file (commented out)
features.to_csv('/Users//sarithavuppula//Downloads/Summer School/feature_engineering_data_final_version.csv', index=None)

100%|██████████| 5/5 [00:43<00:00,  8.78s/it]


In [4]:
pip install tqdm

Defaulting to user installation because normal site-packages is not writeable
Collecting tqdm
  Downloading tqdm-4.66.4-py3-none-any.whl (78 kB)
[K     |████████████████████████████████| 78 kB 7.0 MB/s eta 0:00:01
[?25hInstalling collected packages: tqdm
Successfully installed tqdm-4.66.4
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.
