In [14]:
import pandas as pd
df = pd.read_csv('PAMAP2_Cleaned.csv')

In [15]:
# Get unique activities from the "Activity" column (assuming there is an Activity column)
unique_activities = df['label'].unique()

# Print the number of unique activities
print('Number of Unique Activities: {}'.format(len(unique_activities)))

# Optionally, print the unique activity names
print('labels:', unique_activities)


Number of Unique Activities: 10
labels: ['lying' 'sitting' 'standing' 'standing_household_chores'
 'ascending_stairs' 'descending_stairs' 'walking' 'Nordic_walking'
 'cycling' 'jumping']


In [18]:
print(df.columns)


Index(['time_stamp', 'label', 'hand_3D_acceleration_16_x',
       'hand_3D_acceleration_16_y', 'hand_3D_acceleration_16_z',
       'hand_3D_acceleration_6_x', 'hand_3D_acceleration_6_y',
       'hand_3D_acceleration_6_z', 'id'],
      dtype='object')


In [42]:
# List of columns to be dropped
columns_to_drop = [
'hand_3D_acceleration_16_x',
       'hand_3D_acceleration_16_y', 'hand_3D_acceleration_16_z'
]

# Drop the specified columns
df = df.drop(columns=columns_to_drop, errors='ignore')  # errors='ignore' will ignore any errors if a column is not found
df = df.rename(columns={'id': 'user_id'})



In [44]:
print(df.columns)


Index(['time_stamp', 'label', 'hand_3D_acceleration_6_x',
       'hand_3D_acceleration_6_y', 'hand_3D_acceleration_6_z', 'user_id'],
      dtype='object')


In [46]:
# Assuming 'df' is your DataFrame with a 'time' column

# Print timestamps of row 1 and row 10
timestamp_row_1 = df['time_stamp'].iloc[0]
timestamp_row_10 = df['time_stamp'].iloc[90]

print(f"Timestamp in row 1: {timestamp_row_1}")
print(f"Timestamp in row 10: {timestamp_row_10}")

# Calculate the time difference in milliseconds
time_difference_ms = timestamp_row_10 - timestamp_row_1
print(f"Time difference in milliseconds: {time_difference_ms} ms")

# Convert to seconds if needed
time_difference_seconds = time_difference_ms / 1000
print(f"Time difference in seconds: {time_difference_seconds} seconds")


Timestamp in row 1: 37.66
Timestamp in row 10: 38.56
Time difference in milliseconds: 0.9000000000000057 ms
Time difference in seconds: 0.0009000000000000057 seconds


In [48]:
import pandas as pd



# Parameters
window_size = 170  # Number of rows per window
step_size = 30     # Step size for 80% overlap (150 * 0.2 = 30)

# List to store the windows
windows = []

# Unique window ID counter
window_id = 0

# Group by user ID and activity label to avoid mixing data from different users or activities
for label, group_data in df.groupby(['label']):
    # Create windows of 150 rows with 80% overlap within each group
    for start in range(0, len(group_data) - window_size + 1, step_size):
        end = start + window_size
        window = group_data.iloc[start:end].copy()  # Create a copy of the window
        window['window_id'] = window_id  # Assign a unique window_id to each window
        windows.append(window)
        window_id += 1  # Increment the window_id for the next window

# Combine all windows into a single DataFrame
windowed_data = pd.concat(windows, ignore_index=True)

# Save the final DataFrame with window IDs to a new CSV file
windowed_data.to_csv('PAMAP_window.csv', index=False)

print("CSV file 'windowed_features.csv' created successfully.")


CSV file 'windowed_features.csv' created successfully.


In [50]:

import pandas as pd

# Load the generated windowed data
windowed_data = pd.read_csv('PAMAP_window.csv')

# Count the unique window IDs to determine the number of windows
num_windows = windowed_data['window_id'].nunique()

num_windows


61438

In [51]:
print(windowed_data.head())


   time_stamp           label  hand_3D_acceleration_6_x  \
0     2540.13  Nordic_walking                   1.75241   
1     2540.14  Nordic_walking                   1.75152   
2     2540.15  Nordic_walking                   1.88310   
3     2540.16  Nordic_walking                   1.95351   
4     2540.17  Nordic_walking                   1.93628   

   hand_3D_acceleration_6_y  hand_3D_acceleration_6_z  user_id  window_id  
0                   8.31466                   4.54929      101          0  
1                   8.20895                   4.54953      101          0  
2                   7.83034                   4.44452      101          0  
3                   7.34646                   4.33980      101          0  
4                   7.13511                   4.29498      101          0  


In [52]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, iqr, entropy
from numpy.fft import fft

def calculate_features(window):
    features = {}
    axes = {'hand_3D_acceleration_6_x': 'Ax', 'hand_3D_acceleration_6_y': 'Ay', 'hand_3D_acceleration_6_z': 'Az'}
 
    for axis in axes.keys():
        data = window[axis]

        # Handle NaN and infinite values
        data = data.replace([np.inf, -np.inf], np.nan)  # Replace infinities if any
        data = data.dropna()  # Drop NaN values
        
        if data.empty:
            continue  

        features[f'{axes[axis]}_mean'] = np.mean(data)
        features[f'{axes[axis]}_std'] = np.std(data)
        features[f'{axes[axis]}_mad'] = np.mean(np.abs(data - np.mean(data)))
        features[f'{axes[axis]}_max'] = np.max(data)
        features[f'{axes[axis]}_min'] = np.min(data)
        features[f'{axes[axis]}_sma'] = np.sum(np.abs(data)) / len(data)
        features[f'{axes[axis]}_energy'] = np.sum(data ** 2) / len(data)
        features[f'{axes[axis]}_iqr'] = iqr(data)

        # Adjust histogram calculation for better handling
        if len(data) > 0:
            hist, _ = np.histogram(data.dropna(), bins=10)  # Ensure no NaN values are included
            features[f'{axes[axis]}_entropy'] = entropy(hist + 1e-6) if np.sum(hist) > 0 else 0

        # Frequency-domain features
        freq_data = np.abs(fft(data))[:len(data) // 2]
        features[f'{axes[axis]}_meanFreq'] = np.sum(freq_data * np.arange(len(freq_data))) / np.sum(freq_data)
        features[f'{axes[axis]}_skewness'] = skew(data)
        features[f'{axes[axis]}_kurtosis'] = kurtosis(data)
        features[f'{axes[axis]}_maxInds'] = np.argmax(freq_data)

    return features

In [53]:
# Assuming 'df' is properly loaded and contains the correct columns
window = df.iloc[:100]  # Taking the first 100 rows as a sample window
features = calculate_features(window)
print(features)


{'Ax_mean': 2.4330838, 'Ax_std': 0.26002258752185353, 'Ax_mad': 0.20469862799999997, 'Ax_max': 3.19561, 'Ax_min': 1.73618, 'Ax_sma': 2.4330838, 'Ax_energy': 5.9875085238439985, 'Ax_iqr': 0.34789749999999975, 'Ax_entropy': 1.953732479947525, 'Ax_meanFreq': 3.415159872686383, 'Ax_skewness': 0.3902649421222907, 'Ax_kurtosis': 0.6722250930175475, 'Ax_maxInds': 0, 'Ay_mean': 7.249486899999999, 'Ay_std': 0.8100411180707988, 'Ay_mad': 0.59513181, 'Ay_max': 9.356, 'Ay_min': 4.797, 'Ay_sma': 7.249486899999999, 'Ay_energy': 53.211226926236996, 'Ay_iqr': 0.8751875, 'Ay_entropy': 1.9215460865365317, 'Ay_meanFreq': 4.260739702768555, 'Ay_skewness': -0.5407542811447026, 'Ay_kurtosis': 1.0637693775134318, 'Ay_maxInds': 0, 'Az_mean': 6.2490127, 'Az_std': 0.7793498932993512, 'Az_mad': 0.58268937, 'Az_max': 8.77914, 'Az_min': 4.54805, 'Az_sma': 6.2490127, 'Az_energy': 39.65754598094699, 'Az_iqr': 0.8350075000000006, 'Az_entropy': 1.9502250763805522, 'Az_meanFreq': 2.68427453768784, 'Az_skewness': 0.5690

In [65]:
# Define the axes
axes = ['X', 'Y', 'Z']

# Generate the full list of features
feature_list = []
for axis in axes:
    feature_list.extend([
        f'{axis}_mean', f'{axis}_std', f'{axis}_mad', f'{axis}_max', f'{axis}_min',
        f'{axis}_sma', f'{axis}_energy', f'{axis}_iqr', f'{axis}_entropy',
        f'{axis}_meanFreq', f'{axis}_skewness', f'{axis}_kurtosis', f'{axis}_maxInds'
    ])

# Print the generated list of features
print("Extracted Features:", feature_list)


Extracted Features: ['X_mean', 'X_std', 'X_mad', 'X_max', 'X_min', 'X_sma', 'X_energy', 'X_iqr', 'X_entropy', 'X_meanFreq', 'X_skewness', 'X_kurtosis', 'X_maxInds', 'Y_mean', 'Y_std', 'Y_mad', 'Y_max', 'Y_min', 'Y_sma', 'Y_energy', 'Y_iqr', 'Y_entropy', 'Y_meanFreq', 'Y_skewness', 'Y_kurtosis', 'Y_maxInds', 'Z_mean', 'Z_std', 'Z_mad', 'Z_max', 'Z_min', 'Z_sma', 'Z_energy', 'Z_iqr', 'Z_entropy', 'Z_meanFreq', 'Z_skewness', 'Z_kurtosis', 'Z_maxInds']


In [54]:
# List to store each window's features
features_list = []
df = df.rename(columns={'id': 'user_id'})

# Process each window to calculate features
for window in windows:
    if not window.empty:
        window_id = window['window_id'].iloc[0]  # Ensure 'window_id' is a column in df
        label = window['label'].iloc[0]  # Ensure 'label' is a column in df
        user_id = window['user_id'].iloc[0]  # Ensure 'user_id' is a column in df

        # Calculate features and add identifiers
        features = calculate_features(window)
        features['window_id'] = window_id
        features['label'] = label
        features['user_id'] = user_id

        features_list.append(features)

# Convert the list of feature dictionaries to a DataFrame
features_df = pd.DataFrame(features_list)

# Save the features to a new CSV file
features_df.to_csv('window_id.csv', index=False)

print("CSV file 'train_data_features.csv' created successfully.")

CSV file 'train_data_features.csv' created successfully.


In [55]:
import pandas as pd

# Load the features dataset
features_df = pd.read_csv('data_features.csv')

# Split the data into train and test based on user_id
train_data = features_df[features_df['user_id'] <= 106]
test_data = features_df[features_df['user_id'] >= 107]

# Save the train and test datasets to separate CSV files
train_data.to_csv('train.csv', index=False)
test_data.to_csv('test.csv', index=False)

# Print the shapes of the split datasets
print(f"Train data shape: {train_data.shape}")
print(f"Test data shape: {test_data.shape}")
print("Train and test datasets saved as 'train_split.csv' and 'test_split.csv'.")



Train data shape: (48059, 42)
Test data shape: (16651, 42)
Train and test datasets saved as 'train_split.csv' and 'test_split.csv'.


In [56]:
# Print all column names in the DataFrame
print(df.columns)


Index(['time_stamp', 'label', 'hand_3D_acceleration_6_x',
       'hand_3D_acceleration_6_y', 'hand_3D_acceleration_6_z', 'user_id'],
      dtype='object')
