In [45]:
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import PCA
import seaborn as sns
import matplotlib.pyplot as plt
import tsfel
from sklearn import preprocessing
from sklearn.model_selection import train_test_split
import numpy as np
import os
from langchain_groq.chat_models import ChatGroq
from apikey import api_key

In [46]:
# Groq API and Models 
Groq_Token = api_key  # Do not share this key with anyone

groq_models = {"llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}

In [47]:
X_train_featurised = pd.read_csv("./UCI HAR Dataset/train/X_train.txt", sep='\s+',header=None)
y_train_featurised = pd.read_csv("./UCI HAR Dataset/train/y_train.txt", sep='\s+',header=None)

print(X_train_featurised.shape)

(7352, 561)


In [48]:
time = 10
offset = 100
folders = ["LAYING","SITTING","STANDING","WALKING","WALKING_DOWNSTAIRS","WALKING_UPSTAIRS"]
classes = {"WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6}

combined_dir = os.path.join("Real Data")

X_test=[]
y_test=[]
dataset_dir = os.path.join(combined_dir,"Test")

for folder in folders:
    files = os.listdir(os.path.join(dataset_dir,folder))
    for file in files:

        df = pd.read_csv(os.path.join(dataset_dir,folder,file),sep=",",header=0)
        df = df[offset:offset+time*50]
        X_test.append(df.values)
        y_test.append(classes[folder])

X_real_test = np.array(X_test)
y_real_test = np.array(y_test)

In [49]:
cfg = tsfel.get_features_by_domain() # retrieves all features
X_real_test_featurised = tsfel.time_series_features_extractor(cfg, X_real_test, fs=50)

In [50]:
print(X_real_test_featurised.shape)
print(X_train_featurised.shape)

(24, 825)
(7352, 561)


In [51]:
from sklearn.decomposition import PCA
pca = PCA(n_components=7)
X_train_reduced_tsefl = pca.fit_transform(X_train_featurised)
X_real_test_reduced = pca.fit_transform(X_real_test_featurised)
print(X_train_reduced_tsefl.shape)
print(X_real_test_reduced.shape)

(7352, 7)
(24, 7)


In [65]:
few_shot_prompt = """ 
You are tasked with classifying human activities based on featurized accelerometer data. The activities include:
- WALKING
- WALKING_UPSTAIRS
- WALKING_DOWNSTAIRS
- SITTING
- STANDING
- LAYING

Here are a few labeled examples of the feature vectors and their corresponding activities:

Example 1:
Feature Vector: {example_1_vector}
Activity: {example_1_label}

Example 2:
Feature Vector: {example_2_vector}
Activity: {example_2_label}

Example 3:
Feature Vector: {example_3_vector}
Activity: {example_3_label}

Now, given the following 495-feature vector representing an activity window, predict the most likely activity label:
Feature Vector: {feature_vector}


do not give code.
do not give extra information.
return predicted activity only.
"""
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
# Few-shot learning function
def few_shot_classification(feature_vector, labeled_examples):
    # Prepare the prompt using a few labeled examples
    prompt = few_shot_prompt.format(
        example_1_vector=labeled_examples[0]["feature_vector"],
        example_1_label=labeled_examples[0]["label"],
        example_2_vector=labeled_examples[1]["feature_vector"],
        example_2_label=labeled_examples[1]["label"],
        example_3_vector=labeled_examples[2]["feature_vector"],
        example_3_label=labeled_examples[2]["label"],
        feature_vector=feature_vector
    )
    
    # Invoke the Groq LLM
    predicted_label = llm.invoke(prompt)
    return predicted_label

# Example labeled data for few-shot learning
labeled_examples = [
    
    {"feature_vector": X_train_reduced_tsefl[1].tolist(), "label": "LAYING"},
    {"feature_vector": X_train_reduced_tsefl[2].tolist(), "label": "STANDING"},
    {"feature_vector": X_train_reduced_tsefl[6].tolist(), "label": "WALKING"},
    {"feature_vector": X_train_reduced_tsefl[16].tolist(), "label": "WALKING_DOWNSTAIRS"},
    {"feature_vector": X_train_reduced_tsefl[0].tolist(), "label": "WALKING_UPSTAIRS"},
    {"feature_vector": X_train_reduced_tsefl[9].tolist(), "label": "SITTING"},
]
predicted_activities = []
for i in range(20):  # Predict for 30 examples
    example_vector = X_real_test_reduced[i+3].tolist()  # Using different test examples
    predicted_activity = few_shot_classification(example_vector, labeled_examples)
    predicted_activities.append(classes[predicted_activity.content])
    print(f"Predicted Activity for example {i+1}: {classes[predicted_activity.content]}")

Predicted Activity for example 1: 2
Predicted Activity for example 2: 2
Predicted Activity for example 3: 1
Predicted Activity for example 4: 2
Predicted Activity for example 5: 2
Predicted Activity for example 6: 1
Predicted Activity for example 7: 1
Predicted Activity for example 8: 2
Predicted Activity for example 9: 2
Predicted Activity for example 10: 2
Predicted Activity for example 11: 1
Predicted Activity for example 12: 1
Predicted Activity for example 13: 1
Predicted Activity for example 14: 1
Predicted Activity for example 15: 1
Predicted Activity for example 16: 1
Predicted Activity for example 17: 1
Predicted Activity for example 18: 1
Predicted Activity for example 19: 1
Predicted Activity for example 20: 1


In [67]:
y_pred = np.array(predicted_activities)

y_pred.shape
accuracy = accuracy_score(y_real_test[:20], y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.2000


In [68]:
seed = 4
X_train,X_test,y_train,y_test = train_test_split(X_real_test,y_real_test,test_size=0.3,random_state=seed,stratify=y_real_test)

print("Training data shape: ",X_train.shape)
print("Testing data shape: ",X_test.shape)

Training data shape:  (16, 500, 5)
Testing data shape:  (8, 500, 5)


In [69]:
cfg = tsfel.get_features_by_domain() # retrieves all features
X_real_train_featurised = tsfel.time_series_features_extractor(cfg, X_train, fs=50)
X_real_test_featurised = tsfel.time_series_features_extractor(cfg, X_test, fs=50)
X_train_reduced= pca.fit_transform(X_real_train_featurised)
X_test_reduced = pca.fit_transform(X_real_test_featurised)

In [80]:
few_shot_prompt = """ 
You are tasked with classifying human activities based on featurized accelerometer data. The activities include:
- WALKING
- WALKING_UPSTAIRS
- WALKING_DOWNSTAIRS
- SITTING
- STANDING
- LAYING

Here are a few labeled examples of the feature vectors and their corresponding activities:

Example 1:
Feature Vector: {example_1_vector}
Activity: {example_1_label}

Example 2:
Feature Vector: {example_2_vector}
Activity: {example_2_label}

Example 3:
Feature Vector: {example_3_vector}
Activity: {example_3_label}

Now, given the following 495-feature vector representing an activity window, predict the most likely activity label:
Feature Vector: {feature_vector}


do not give code.
do not give extra information.
return predicted activity only.
"""
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
# Few-shot learning function
def few_shot_classification(feature_vector, labeled_examples):
    # Prepare the prompt using a few labeled examples
    prompt = few_shot_prompt.format(
        example_1_vector=labeled_examples[0]["feature_vector"],
        example_1_label=labeled_examples[0]["label"],
        example_2_vector=labeled_examples[1]["feature_vector"],
        example_2_label=labeled_examples[1]["label"],
        example_3_vector=labeled_examples[2]["feature_vector"],
        example_3_label=labeled_examples[2]["label"],
        feature_vector=feature_vector
    )
    
    # Invoke the Groq LLM
    predicted_label = llm.invoke(prompt)
    return predicted_label

# Example labeled data for few-shot learning
labeled_examples = [
    
    {"feature_vector": X_train_reduced[1].tolist(), "label": "WALKING_UPSTAIRS"},
    {"feature_vector": X_train_reduced[2].tolist(), "label": "SITTING"},
    {"feature_vector": X_train_reduced[6].tolist(), "label": "WALKING"},
    {"feature_vector": X_train_reduced[14].tolist(), "label": "LAYING"},
    {"feature_vector": X_train_reduced[0].tolist(), "label": "STANDING"},
    {"feature_vector": X_train_reduced[9].tolist(), "label": "WALKING"},
]
predicted_activities = []
for i in range(8):  # Predict for 30 examples
    example_vector = X_test_reduced[i].tolist()  # Using different test examples
    predicted_activity = few_shot_classification(example_vector, labeled_examples)
    predicted_activities.append(classes[predicted_activity.content])
    print(f"Predicted Activity for example {i+1}: {classes[predicted_activity.content]}")

Predicted Activity for example 1: 2
Predicted Activity for example 2: 1
Predicted Activity for example 3: 1
Predicted Activity for example 4: 3
Predicted Activity for example 5: 1
Predicted Activity for example 6: 2
Predicted Activity for example 7: 1
Predicted Activity for example 8: 1


In [78]:
print(y_train)

[5 2 4 2 6 1 1 5 6 1 4 5 3 4 6 3]


In [81]:
y_pred = np.array(predicted_activities)
y_pred.shape
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.0000
