In [None]:
# Ashley Hunt - psyah10
import os

def get_gestures():
      return ['circle', 'come', 'go', 'wave']

def get_columns():
    return ['time', 'accel_x', 'accel_y', 'accel_z', 'accel_abs']

def get_gesture_csvs(gesture):
      gesture_dir = os.path.join(os.getcwd(), "data", gesture)
      return [file for file in os.listdir(gesture_dir) if file.endswith('.csv')]

############Use Scaler =None



In [None]:
############Use Scaler =None
# Ashley Hunt - psyah10
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
import pandas as pd

## Simple function to read a csv file and return a dataframe - If scaler is provided, it will scale the data
def get_df(path, scaler=None, trim=True):
      if not path.endswith('.csv'):
            return []
      
      raw_data = pd.read_csv(path)
      raw_data.columns = get_columns()

      df = pd.DataFrame(scaler.fit_transform(raw_data) if scaler else raw_data, columns=raw_data.columns)

      return trim_recording(df) if trim else df

## Function to trim the recording to the first and last significant movement
def trim_recording(df, padding=90):
      window_size = 20
      threshold = 0.3

      df['rolling_max'] = df['accel_abs'].rolling(window=window_size, min_periods=1).mean()

      start_cut = df[df['rolling_max'] >= threshold].index.min()
      if pd.notna(start_cut):
            cut_index = max(start_cut - padding, 0)
            df = df.loc[cut_index:]
      
      end_cut = df[df['rolling_max'] >= threshold].index.max()
      if pd.notna(end_cut):
            end_cut_index = min(end_cut + padding, len(df) - 1) 
            df = df.loc[:end_cut_index]

      df = df.drop('rolling_max', axis=1)
      return df

## Function to get all the data from the files in the data folder
def get_data_from_files(scaler=None):
      dfs = []
      for gesture in get_gestures():
            for file_index, file_name in enumerate(get_gesture_csvs(gesture)):
                  df = get_df(f'data/{gesture}/{file_name}', scaler)
                  if len(df) == 0:
                        continue

                  df['file_number'] = int(file_index)
                  df['file_name'] = str(file_name)
                  df['gesture'] = str(gesture)

                  dfs.append(df)

      return pd.concat(dfs, ignore_index=True) if len(dfs) > 1 else dfs[0]

# scaler = StandardScaler()
#scaler = MinMaxScaler()


df = get_data_from_files(None)
#df.head()
test_data = get_df(f'/Users/amitkumar/Downloads/Acceleration without g 2024-04-24 18-21-44 wave/Raw Data.csv', None)
test_data['file_number'] = 0
test_data['file_name'] = str("Raw Data")
test_data['gesture'] = str("go")
test_data

#df

In [None]:
####SVM
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.metrics import balanced_accuracy_score

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import LabelEncoder


X = df.drop(columns=['gesture', 'time','file_name','file_number'])  # Features
y = df['gesture']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Define the SVM classifier
svm = SVC(random_state=42)

# Define parameter grid
param_grid = {
    'svm__C': [0.1],
    'svm__kernel': ['rbf'],
    'svm__gamma': ['scale']
}


#param_grid = {
    #'svm__C': [0.1, 1, 10],
    #'svm__kernel': ['linear', 'rbf'],
    #'svm__gamma': ['scale', 'auto']
#}

# Create a pipeline with scaling and SVM
pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('svm', svm)
])
# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Encode the target variable
y_train_encoded = label_encoder.fit_transform(y_train)
# Encode the target variable for test set
y_test_encoded = label_encoder.transform(y_test)

# Perform grid search with cross-validation
grid_search_svm = GridSearchCV(estimator=pipeline, param_grid=param_grid, cv=3, scoring='accuracy')
grid_search_svm.fit(X_train, y_train_encoded)

# Get the best parameters and the best estimator from grid search
best_params_svm = grid_search_svm.best_params_
best_estimator_svm = grid_search_svm.best_estimator_

# Make predictions on the test set using the best estimator
y_pred_svm = best_estimator_svm.predict(X_test)

# Calculate balanced accuracy
balanced_accuracy_svm = balanced_accuracy_score(y_test_encoded, y_pred_svm)

# Display best parameters and balanced accuracy for SVM
print("Best Parameters (SVM):", best_params_svm)
print("Balanced Accuracy (SVM):", balanced_accuracy_svm)

#Best Parameters (SVM): {'svm__C': 0.1, 'svm__gamma': 'scale', 'svm__kernel': 'rbf'}
#Balanced Accuracy (SVM): 0.5483988039391403
#TIME 5MIN
#Keep for SVM
#Lower accuracy 37.10 at
#param_grid = {
    #'svm__C': [0.1],
    #'svm__kernel': ['linear'],
    #'svm__gamma': ['scale']
#}


In [None]:
#Random Forest
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import balanced_accuracy_score
from sklearn.model_selection import StratifiedShuffleSplit, GridSearchCV


X = df.drop(columns=['gesture', 'time','file_name','file_number'])  # Features
y = df['gesture']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)
#for train_index, test_index in sss.split(X, y):
    #X_train, X_test = X[train_index], X[test_index]
    #y_train, y_test = y[train_index], y[test_index]

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rfc = RandomForestClassifier(random_state=42)

param_grid = {
    'n_estimators': [250], #'n_estimators': [50, 100, 150]
    'max_depth': [None], #'max_depth': [None, 10, 20]
    'min_samples_split': [10], #'min_samples_split': [2, 5, 10]
    'min_samples_leaf': [2] #'min_samples_leaf': [1, 2, 4]
}
 
grid_search = GridSearchCV(estimator=rfc, param_grid=param_grid, cv=3,scoring='balanced_accuracy')
grid_search.fit(X_train_scaled, y_train)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_
 
y_pred = best_estimator.predict(X_test_scaled)
balanced_accuracy = balanced_accuracy_score(y_test, y_pred)
print("Best Parameters:", best_params)
print("Balanced Accuracy:", balanced_accuracy)


# Combine the predicted and actual gestures with their corresponding time points
results = pd.DataFrame({'Time (s)': X_test.index, 'Actual Gesture': y_test, 'Predicted Gesture': y_pred})

# Print the results
print(results)

#59.333% accuracy

In [None]:
#for testing
#test_data---new data

# Preprocess the new test data
test_data_features = test_data.drop(columns=['gesture', 'time','file_name','file_number'])  # Features
test_data_features_scaled = scaler.transform(test_data_features)  # Scale the features using the same scaler as before

# Predict on the new test data
y_pred_new = best_estimator.predict(test_data_features_scaled)

# Calculate accuracy on the new test data
accuracy_new = accuracy_score(test_data['gesture'], y_pred_new)

# Combine the predicted and actual gestures with their corresponding time points
results_new = pd.DataFrame({'Time (s)': test_data.index, 'Actual Gesture': test_data['gesture'], 'Predicted Gesture': y_pred_new})

# Print the results along with accuracy
print(results_new)
print("Accuracy on new test data:", accuracy_new)


In [None]:
#XGB 
# Amit Kumar
from xgboost import XGBClassifier
from sklearn.preprocessing import LabelEncoder

X = df.drop(columns=['gesture', 'time','file_name','file_number'])  # Features
y = df['gesture']  # Target variable

#X = df.drop(columns=['gesture', 'time','file_name','gesture_number','file_number'])  # Features
#y = df['gesture'] 

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

#sss = StratifiedShuffleSplit(n_splits=5, test_size=0.2, random_state=42)

#for train_index, test_index in sss.split(X, y):
    #X_train, X_test = X[train_index], X[test_index]
    #y_train, y_test = y[train_index], y[test_index]
    
xgb_classifier = XGBClassifier(random_state=42)

param_grid = {
    'n_estimators': [300],
    'max_depth': [20],
    'learning_rate': [0.1],
    'subsample': [0.8],
    'colsample_bytree': [1]
}

#param_grid = {
    #'n_estimators': [50, 100, 150],
    #'max_depth': [3, 5, 7],
    #'learning_rate': [0.1, 0.01, 0.001],
    #'subsample': [0.8, 0.9, 1.0],
    #'colsample_bytree': [0.8, 0.9, 1.0]
#}

# Initialize LabelEncoder
label_encoder = LabelEncoder()

# Encode the target variable
y_train_encoded = label_encoder.fit_transform(y_train)
# Encode the target variable for test set
y_test_encoded = label_encoder.transform(y_test)

grid_search = GridSearchCV(estimator=xgb_classifier, param_grid=param_grid, scoring='accuracy')
grid_search.fit(X_train, y_train_encoded)

best_params = grid_search.best_params_
best_estimator = grid_search.best_estimator_

y_pred = best_estimator.predict(X_test)

balanced_accuracy = balanced_accuracy_score(y_test_encoded, y_pred)
print("Best Parameters:", best_params)
print("Balanced Accuracy:", balanced_accuracy)

#Best Parameters: {'colsample_bytree': 1.0, 'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 150, 'subsample': 0.8}
#Balanced Accuracy: 0.6657678741988493

#Best Parameters: {'colsample_bytree': 1, 'learning_rate': 0.1, 'max_depth': 20, 'n_estimators': 300, 'subsample': 0.8}
#Balanced Accuracy: 0.6817467817973307

In [None]:
#LSTM
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense
import matplotlib.pyplot as plt

# Load the data
data = df

# Encode the gesture labels
label_encoder = LabelEncoder()
data['gesture_encoded'] = label_encoder.fit_transform(data['gesture'])

# Normalize the features
scaler = StandardScaler()
data[['accel_x', 'accel_y', 'accel_z', 'accel_abs']] = scaler.fit_transform(data[['accel_x', 'accel_y', 'accel_z', 'accel_abs']])

# Define the time steps
time_steps = 10

# Prepare the data for LSTM
def prepare_data_for_lstm(data, time_steps):
    X, y = [], []
    for i in range(len(data) - time_steps):
        X.append(data.iloc[i:(i + time_steps)].values)
        y.append(data.iloc[i + time_steps]['gesture_encoded'])
    return np.array(X), np.array(y)

X, y = prepare_data_for_lstm(data[['accel_x', 'accel_y', 'accel_z', 'accel_abs', 'gesture_encoded']], time_steps)

# Split the data into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

# Build the LSTM model
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(LSTM(units=50))
model.add(Dense(len(label_encoder.classes_), activation='softmax')) # Change activation to softmax for multi-class classification
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) # Change loss function for multi-class classification

# Train the model with validation data
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_data=(X_val, y_val))

# Evaluate the model on the test set
_, test_accuracy = model.evaluate(X_test, y_test)
print('Test Accuracy:', test_accuracy)

# Plot training and validation accuracy over epochs
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Training and Validation Accuracy')
plt.legend()
plt.show()
