In [24]:
import pandas as pd
df = pd.read_csv('combined_file.csv')

In [25]:
# Get unique activities from the "Activity" column
unique_activities = df['label'].unique()

# Print the number of unique activities
print('Number of Unique Activities: {}'.format(len(unique_activities)))

# print the unique activity names
print('labels:', unique_activities)


Number of Unique Activities: 9
labels: ['jumping' 'standing' 'lying' 'sitting' 'walkingStairsUp'
 'standing_hosehold_chores' 'vacuum_cleaning' 'walking' 'cycling']


In [26]:
print(df.columns)


Index(['Unnamed: 0', 'time', 'AccX', 'AccY', 'AccZ', 'label', 'user_id'], dtype='object')


In [51]:

# Print timestamps of row 1 and row 10
timestamp_row_1 = df['time'].iloc[0]
timestamp_row_10 = df['time'].iloc[150]

print(f"Timestamp in row 1: {timestamp_row_1}")
print(f"Timestamp in row 10: {timestamp_row_10}")

# Calculate the time difference in milliseconds
time_difference_ms = timestamp_row_10 - timestamp_row_1
print(f"Time difference in milliseconds: {time_difference_ms} ms")

# Convert to seconds if needed
time_difference_seconds = time_difference_ms / 1000
print(f"Time difference in seconds: {time_difference_seconds} seconds")


Timestamp in row 1: 1449044969996.0
Timestamp in row 10: 1449044971746.0
Time difference in milliseconds: 1750.0 ms
Time difference in seconds: 1.75 seconds


In [28]:
import pandas as pd



# Parameters
window_size = 150  # Number of rows per window
step_size = 30     # Step size for 80% overlap (150 * 0.2 = 30)

# List to store the windows
windows = []

# Unique window ID counter
window_id = 0

# Group by user ID and activity label to avoid mixing data from different users or activities
for label, group_data in df.groupby(['label']):
    # Create windows of 150 rows with 80% overlap within each group
    for start in range(0, len(group_data) - window_size + 1, step_size):
        end = start + window_size
        window = group_data.iloc[start:end].copy()  # Create a copy of the window
        window['window_id'] = window_id  # Assign a unique window_id to each window
        windows.append(window)
        window_id += 1  # Increment the window_id for the next window

# Combine all windows into a single DataFrame
windowed_data = pd.concat(windows, ignore_index=True)

# Save the final DataFrame with window IDs to a new CSV file
windowed_data.to_csv('train_data.csv', index=False)

print("CSV file 'windowed_features.csv' created successfully.")


CSV file 'windowed_features.csv' created successfully.


In [29]:

import pandas as pd

# Load the generated windowed data
windowed_data = pd.read_csv('train_data.csv')

# Count the unique window IDs to determine the number of windows
num_windows = windowed_data['window_id'].nunique()

num_windows


244811

In [30]:
print(windowed_data.head())


   Unnamed: 0          time    AccX    AccY    AccZ    label  user_id  \
0      390129  1.449049e+12 -0.3704  0.8633 -0.4630  cycling        1   
1      390130  1.449049e+12 -0.3974  0.5605 -0.7601  cycling        1   
2      390131  1.449049e+12 -0.3319  0.2849 -0.5568  cycling        1   
3      390132  1.449049e+12 -0.6365  0.6148 -0.7796  cycling        1   
4      390133  1.449049e+12 -0.5054  0.4130 -0.7210  cycling        1   

   window_id  
0          0  
1          0  
2          0  
3          0  
4          0  


In [31]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis, iqr, entropy
from numpy.fft import fft

def calculate_features(window):
    features = {}
    axes = {'AccX': 'Ax', 'AccY': 'Ay', 'AccZ': 'Az'}
 
    for axis in axes.keys():
        data = window[axis]

        # Handle NaN and infinite values
        data = data.replace([np.inf, -np.inf], np.nan)  # Replace infinities if any
        data = data.dropna()  # Drop NaN values
        
        if data.empty:
            continue  

        features[f'{axes[axis]}_mean'] = np.mean(data)
        features[f'{axes[axis]}_std'] = np.std(data)
        features[f'{axes[axis]}_mad'] = np.mean(np.abs(data - np.mean(data)))
        features[f'{axes[axis]}_max'] = np.max(data)
        features[f'{axes[axis]}_min'] = np.min(data)
        features[f'{axes[axis]}_sma'] = np.sum(np.abs(data)) / len(data)
        features[f'{axes[axis]}_energy'] = np.sum(data ** 2) / len(data)
        features[f'{axes[axis]}_iqr'] = iqr(data)

        # Adjust histogram calculation for better handling
        if len(data) > 0:
            hist, _ = np.histogram(data.dropna(), bins=10)  # Ensure no NaN values are included
            features[f'{axes[axis]}_entropy'] = entropy(hist + 1e-6) if np.sum(hist) > 0 else 0

        # Frequency-domain features
        freq_data = np.abs(fft(data))[:len(data) // 2]
        features[f'{axes[axis]}_meanFreq'] = np.sum(freq_data * np.arange(len(freq_data))) / np.sum(freq_data)
        features[f'{axes[axis]}_skewness'] = skew(data)
        features[f'{axes[axis]}_kurtosis'] = kurtosis(data)
        features[f'{axes[axis]}_maxInds'] = np.argmax(freq_data)

    return features

In [32]:
# List to store each window's features
features_list = []

# Process each window to calculate features
for window in windows:
    if not window.empty:
        window_id = window['window_id'].iloc[0]  # Ensure 'window_id' is a column in df
        label = window['label'].iloc[0]  # Ensure 'label' is a column in df
        user_id = window['user_id'].iloc[0]  # Ensure 'user_id' is a column in df

        # Calculate features and add identifiers
        features = calculate_features(window)
        features['window_id'] = window_id
        features['label'] = label
        features['user_id'] = user_id

        features_list.append(features)

# Convert the list of feature dictionaries to a DataFrame
features_df = pd.DataFrame(features_list)

# Save the features to a new CSV file with features
features_df.to_csv('data_features.csv', index=False)



In [33]:
# Print all column names in the DataFrame
print(df.head)


<bound method NDFrame.head of          Unnamed: 0          time    AccX    AccY    AccZ            label  \
0             12129  1.449045e+12 -0.1969  0.9021 -0.4356          jumping   
1             12130  1.449045e+12 -0.2046  0.9060 -0.4317          jumping   
2             12131  1.449045e+12 -0.2008  0.9021 -0.4317          jumping   
3             12132  1.449045e+12 -0.1969  0.8982 -0.4356          jumping   
4             12133  1.449045e+12 -0.1969  0.9060 -0.4278          jumping   
...             ...           ...     ...     ...     ...              ...   
7345472      329997  1.464600e+12 -0.4128 -0.1925 -0.9829  vacuum_cleaning   
7345473      329998  1.464600e+12 -0.4206 -0.1964 -0.9829  vacuum_cleaning   
7345474      329999  1.464600e+12 -0.4090 -0.2120 -0.9829  vacuum_cleaning   
7345475      330000  1.464600e+12 -0.4167 -0.2624 -0.9751  vacuum_cleaning   
7345476      330001  1.464600e+12 -0.3242 -0.2158 -0.9399  vacuum_cleaning   

         user_id  
0             

In [34]:
# Load the CSV file

# Group the DataFrame by 'user_id'
grouped = features_df.groupby('user_id')

# Loop through the users and save each users to a separate CSV file
for user_id, group in grouped:
    filename = f'GOTOV_with_WINDOWS_{user_id}.csv'
    group.to_csv(filename, index=False)
    print(f'Saved {filename}')


Saved GOTOV_with_WINDOWS_1.csv
Saved GOTOV_with_WINDOWS_2.csv
Saved GOTOV_with_WINDOWS_3.csv
Saved GOTOV_with_WINDOWS_4.csv
Saved GOTOV_with_WINDOWS_5.csv
Saved GOTOV_with_WINDOWS_6.csv
Saved GOTOV_with_WINDOWS_7.csv
Saved GOTOV_with_WINDOWS_8.csv
Saved GOTOV_with_WINDOWS_9.csv
Saved GOTOV_with_WINDOWS_10.csv
Saved GOTOV_with_WINDOWS_11.csv
Saved GOTOV_with_WINDOWS_12.csv
Saved GOTOV_with_WINDOWS_13.csv
Saved GOTOV_with_WINDOWS_14.csv
Saved GOTOV_with_WINDOWS_15.csv
Saved GOTOV_with_WINDOWS_16.csv
Saved GOTOV_with_WINDOWS_17.csv
Saved GOTOV_with_WINDOWS_18.csv
Saved GOTOV_with_WINDOWS_19.csv
Saved GOTOV_with_WINDOWS_20.csv
Saved GOTOV_with_WINDOWS_21.csv
Saved GOTOV_with_WINDOWS_22.csv
Saved GOTOV_with_WINDOWS_23.csv
Saved GOTOV_with_WINDOWS_24.csv
Saved GOTOV_with_WINDOWS_25.csv
Saved GOTOV_with_WINDOWS_26.csv
Saved GOTOV_with_WINDOWS_27.csv
Saved GOTOV_with_WINDOWS_28.csv
Saved GOTOV_with_WINDOWS_29.csv
Saved GOTOV_with_WINDOWS_30.csv
Saved GOTOV_with_WINDOWS_31.csv
Saved GOTOV_with_