In [1]:
import pandas as pd
import os

# Define the path to the UCI HAR dataset
uci_base_path = r'C:\Users\arpit\OneDrive\Desktop\es335-24-fall-assignment-1\UCI HAR Dataset\Combined'

# Define the activities
activities = ['WALKING', 'WALKING_UPSTAIRS', 'WALKING_DOWNSTAIRS', 'SITTING', 'STANDING', 'LAYING']

# Initialize lists to store the data and labels
X_train = []
y_train = []

# Process each activity
for activity in activities:
    activity_path = os.path.join(uci_base_path, 'train', activity)
    for file_name in os.listdir(activity_path):
        if file_name.endswith('.csv'):
            file_path = os.path.join(activity_path, file_name)
            data = pd.read_csv(file_path)

            # Select the relevant columns
            data = data[['accx', 'accy', 'accz']]

            # Append DataFrame to X_train without converting to NumPy
            X_train.append(data)
            y_train.extend([activity] * data.shape[0])  # Append the label for each sample

# Concatenate all DataFrames in X_train to form a single DataFrame
X_train = pd.concat(X_train, axis=0)
y_train = pd.Series(y_train)

print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)

# Display the first few rows to confirm column names
print(X_train.head())
print(y_train.head())


X_train shape: (470528, 3)
y_train shape: (470528,)
       accx      accy      accz
0  1.426164 -0.362485  0.278914
1  1.496596 -0.591127  0.120137
2  1.305815 -0.645547  0.012587
3  0.973824 -0.543838 -0.001186
4  0.691378 -0.424250 -0.015278
0    WALKING
1    WALKING
2    WALKING
3    WALKING
4    WALKING
dtype: object


In [43]:
import numpy as np
import pandas as pd
from langchain_groq import ChatGroq
import os
from sklearn.tree import DecisionTreeClassifier
from dotenv import load_dotenv

load_dotenv()

Groq_Token = os.getenv('GROQ_API_KEY')
groq_models = {"llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}
model_name = "llama3.1-70b"
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
# Testing data 

file1_laying =  pd.read_csv("Combined/Test/LAYING/Subject_2.csv")
file2_walking = pd.read_csv("Combined/Test/WALKING/Subject_2.csv")
file3_sitting=  pd.read_csv("Combined/Test/SITTING/Subject_2.csv")
file4_standing=  pd.read_csv("Combined/Test/STANDING/Subject_2.csv")
file5_upstairs=  pd.read_csv("Combined/Test/WALKING_UPSTAIRS/Subject_2.csv")
file6_downstairs=  pd.read_csv("Combined/Test/WALKING_DOWNSTAIRS/Subject_2.csv")


df1 = pd.DataFrame(file1_laying).head(100)
df2 = pd.DataFrame(file2_walking).head(100)
df3 = pd.DataFrame(file3_sitting).head(100)
df4 = pd.DataFrame(file4_standing).head(100)
df5 = pd.DataFrame(file5_upstairs).head(100)
df6 = pd.DataFrame(file6_downstairs).head(100)


# Training Data for few shot prompt examples

laying_test = pd.read_csv("Combined/Train/LAYING/Subject_1.csv")
sitting_test = pd.read_csv("Combined/Train/SITTING/Subject_1.csv")
standing_test = pd.read_csv("Combined/Train/STANDING/Subject_1.csv")
walking_test = pd.read_csv("Combined/Train/WALKING/Subject_1.csv")
downstairs_test = pd.read_csv("Combined/Train/WALKING_DOWNSTAIRS/Subject_1.csv")
upstairs_test = pd.read_csv("Combined/Train/WALKING_UPSTAIRS/Subject_1.csv")

laying_df = pd.DataFrame(laying_test).head(100)
sitting_df = pd.DataFrame(sitting_test).head(100)
standing_df = pd.DataFrame(standing_test).head(100)
walking_df = pd.DataFrame(walking_test).head(100)
downstairs_df = pd.DataFrame(downstairs_test).head(100)
upstairs_df = pd.DataFrame(upstairs_test).head(100)




In [44]:
# Zero shot demonstration

zero_shot_prompt = f"""
* You are a human activity recognition model.
* Your task is to classify the following accelerometer data into one of the six activities: Walking, Standing, Sittting, Laying, Walking Upstairs, Walking Downstairs. 
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.
Here is the accelerometer data:
{df1}, {df2}, {df3}

Please classify the activity for these three accelerometer data.
"""

zero_shot_answer = llm.invoke(zero_shot_prompt)
print(zero_shot_answer.content)

Based on the provided accelerometer data, I will classify the activities as follows:

**Data 1:**
The accelerometer data shows a relatively stable pattern with small variations in the x, y, and z axes. The values are mostly within a small range, indicating a low level of movement. This pattern is consistent with the activity of **Standing**.

**Data 2:**
The accelerometer data shows a significant variation in the x-axis, with values ranging from approximately 0.7 to 1.1. This suggests a high level of movement in the x-axis, which is consistent with the activity of **Walking**. The y and z axes show relatively smaller variations, which further supports this classification.

**Data 3:**
The accelerometer data shows a relatively stable pattern with small variations in the x, y, and z axes. The values are mostly within a small range, indicating a low level of movement. However, the x-axis values are slightly higher than those in Data 1, which suggests a slightly more upright posture. This 

In [22]:
# Few Shot demonstration
few_shot_prompt = f""" 
* You are a human activity recognition model.
* Your task is to classify the following accelerometer data into one of the six activities: Walking, Standing, Sittting, Laying, Walking Upstairs, Walking Downstairs. 
* Provide only labels for the dataset. 

Here are some examples:
1.Dataset of laying: {laying_df}
2.Dataset of sitting: {sitting_df}
3.Dataset of standing: {standing_df}
4.Dataset of walking: {walking_df}
5.Dataset of walking downstairs: {downstairs_df}
6.Dataset of walking upstairs: {upstairs_df}

Here is the accelerometer data:
{df1}, 
{df2},
{df3},
{df4},
{df5},
{df6}

Please classify the activity for these six accelerometer data using the dataset of sample activites.
"""
few_shot_answer = llm.invoke(few_shot_prompt)
print(few_shot_answer.content)


Based on the provided accelerometer data, I will classify the activities as follows:

1. Laying
2. Walking
3. Standing
4. Walking
5. Walking Downstairs
6. Walking Upstairs


In [28]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

true_labels = ["Laying", "Sitting","Standing", "Walking" , "Walking Downstairs", "Walking Upstairs"]
model_predictions = ["Laying", "Walking", "Standing", "Walking", "Walking Downstairs", "Walking Upstairs"]

accuracy = accuracy_score(true_labels, model_predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 83.33%


In [45]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

true_labels = ["Laying", "Walking","Sitting"]
model_predictions = ["Standing", "Walking","Sitting"]

accuracy = accuracy_score(true_labels, model_predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 66.67%
