# Notebook to demonstrate Zero shot and Few shot Learning

In [1]:
import pandas as pd 
from langchain_groq.chat_models import ChatGroq
from sklearn.metrics import accuracy_score, precision_score, recall_score, confusion_matrix, classification_report

In [10]:
# Groq API and Models 
Groq_Token = "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"  # Do not share this key with anyone

groq_models = {"llama3-70b": "llama3-70b-8192", "mixtral": "mixtral-8x7b-32768", "gemma-7b": "gemma-7b-it","llama3.1-70b":"llama-3.1-70b-versatile","llama3-8b":"llama3-8b-8192","llama3.1-8b":"llama-3.1-8b-instant","gemma-9b":"gemma2-9b-it"}

**NOTE : DO NOT SHARE THE API KEY WITH ANYONE. DO NOT COMMIT THE API KEY TO GITHUB.**

Always do a sanity check before committing the code to github. If the key is found in the code, you will be penalized with a 0.5 marks deduction.

# Zero Shot 

In [3]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."

# System Prompts 
query = f"""
* You are a sentiment analysis model. 
* Your task is to analyze the sentiment expressed in the given text and classify it as 'positive', 'negative', or 'neutral'. 
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.

Sentence: {sentence}
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Sentiment label: Neutral

Explanation: The sentence expresses mixed sentiments. The words "amazing" and "happy" convey a positive sentiment, indicating satisfaction with the product quality and customer service, respectively. However, the phrase "delivery was delayed" expresses a negative sentiment, indicating dissatisfaction with the delivery experience. Since both positive and negative sentiments are present, the overall sentiment is neutral.


# Few Shot

In [4]:
# Statement 
sentence = "The product quality is amazing but the delivery was delayed. However I am happy with the customer service."

# System Prompts 
query = f"""
* You are a sentiment analysis model. 
* Your task is to analyze the sentiment expressed in the given text and classify it as 'positive', 'negative', or 'neutral'. 
* Provide the sentiment label and, if necessary, a brief explanation of your reasoning.

Here are few examples:
1. Sentence: 'The customer service was excellent, and I received my order quickly.'
Sentiment: Positive

2. Sentence: 'The food was bland and the service was slow.'
Sentiment: Negative

3. Sentence: 'The product is okay, but it's not worth the price.'
Sentiment: Neutral

Sentence: {sentence}
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Sentiment: Positive

Explanation: Although the sentence mentions a negative aspect ("the delivery was delayed"), the positive sentiments expressed in the sentence ("The product quality is amazing", "I am happy with the customer service") outweigh the negative one, resulting in an overall positive sentiment.


In [5]:
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
#
#                                   ES335- Machine Learning- Assignment 1
#
# This file is used to create the dataset for the mini-project. The dataset is created by reading the data from
# the Combined folder. The data is then split into training, testing, and validation sets. This split is supposed
# to be used for all the modeling purposes.
#
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

# Library imports
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import os

# Constants
time = 10
offset = 100
folders = ["LAYING","SITTING","STANDING","WALKING","WALKING_DOWNSTAIRS","WALKING_UPSTAIRS"]
classes = {"WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6}

combined_dir = os.path.join("Combined")

#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
                                                # Train Dataset
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

X_train=[]
y_train=[]
dataset_dir = os.path.join(combined_dir,"Train")

for folder in folders:
    files = os.listdir(os.path.join(dataset_dir,folder))

    for file in files:

        df = pd.read_csv(os.path.join(dataset_dir,folder,file),sep=",",header=0)
        df = df[offset:offset+time*50]
        X_train.append(df.values)
        y_train.append(classes[folder])

X_train = np.array(X_train)
y_train = np.array(y_train)


#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
                                                # Test Dataset
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

X_test=[]
y_test=[]
dataset_dir = os.path.join(combined_dir,"Test")

for folder in folders:
    files = os.listdir(os.path.join(dataset_dir,folder))
    for file in files:

        df = pd.read_csv(os.path.join(dataset_dir,folder,file),sep=",",header=0)
        df = df[offset:offset+time*50]
        X_test.append(df.values)
        y_test.append(classes[folder])

X_test = np.array(X_test)
y_test = np.array(y_test)

#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
                                                # Final Dataset
#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=

# USE THE BELOW GIVEN DATA FOR TRAINING and TESTING purposes

# concatenate the training and testing data
X = np.concatenate((X_train,X_test))
y = np.concatenate((y_train,y_test))

# split the data into training and testing sets. Change the seed value to obtain different random splits.
seed = 4
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=seed,stratify=y)

print("Training data shape: ",X_train.shape)
print("Testing data shape: ",X_test.shape)

#=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-==-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=




Training data shape:  (126, 500, 3)
Testing data shape:  (54, 500, 3)


In [6]:
data = X_train[0]
y_train[0]

np.int64(2)

In [7]:
# System Prompts 
query = f"""
* You are a given accelerometer data . 
*A person performed one of the six activities (WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING) wearing a smartphone (Samsung Galaxy S II) on the waist. Using its embedded accelerometer and gyroscope, we captured 3-axial linear acceleration at a constant rate of 50Hz.
*Your task is to find out which of the six activities did the person do from the following csv file which contains data in x, y and z directions respectively.
*Provide the activity name and, if necessary, a brief explanation of your reasoning.

Data: {data}
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

A classic problem in machine learning and signal processing!

To solve this problem, I'll use a combination of feature extraction and machine learning techniques. Since we don't have any additional information about the data, I'll rely on the patterns and characteristics of the accelerometer data to identify the activity.

**Feature Extraction**

From the given data, I'll extract some meaningful features that can help distinguish between the six activities. Here are some features I'll consider:

1. **Mean**: Calculate the mean of each axis (x, y, z) to capture the overall acceleration pattern.
2. **Standard Deviation**: Calculate the standard deviation of each axis to capture the variability of the acceleration.
3. **Peak-to-Peak Amplitude**: Calculate the peak-to-peak amplitude of each axis to capture the range of acceleration values.
4. **Frequency Domain Features**: Calculate the power spectral density (PSD) of each axis using a Fast Fourier Transform (FFT). This will help capture t

In [8]:
input_data = X_train
input_labels = y_train
output_data = X_test
output_labels = np.array(output_data.shape[0])

In [100]:
# System Prompts 
query = f"""
* You are a given accelerometer data of 54 persons. 
*Each person performed one of the six activities (WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING) wearing a smartphone (Samsung Galaxy S II) on the waist. Using its embedded accelerometer and gyroscope, we captured 3-axial linear acceleration at a constant rate of 50Hz.
*Your task is to find out which of the six activities did each person do from the following output_data array which contains data in x, y and z directions respectively for 54 people and add it to an array and return the array of length 54
*Provide the activity name and, if necessary, a brief explanation of your reasoning.
*classes = ("WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6) This dictionary is used for labeling the activities.
*Dont give me codes, give me results in array of length 54 according to the dictionary in range of 1 through 6


Here are few examples:
Data: {input_data} of length 126
Labels = {input_labels}

Data: {output_data} of length 54
Labels = 

Return a array of 54 labels for the output_data samples not of size 114 or 113
""" 

# To use Groq LLMs 
model_name = "llama3-70b" # We can choose any model from the groq_models dictionary
llm = ChatGroq(model=groq_models[model_name], api_key=Groq_Token, temperature=0)
answer = llm.invoke(query)

print(answer.content)

Based on the provided accelerometer data, I will analyze each sample and determine the corresponding activity label. Here is the array of 54 labels:

[2, 6, 5, 2, 2, 2, 1, 1, 5, 4, 1, 4, 1, 1, 6, 2, 3, 1, 1, 4, 3, 3, 5, 6, 5, 1, 3, 6, 4, 5, 5, 2, 5, 1, 1, 6, 4, 3, 3, 3, 4, 6, 3, 4, 1, 2, 2, 6, 5, 5, 3, 6, 6, 4, 5, 3, 4, 3, 1, 4, 4, 3, 1, 5, 2, 3, 6, 1, 4, 2, 2, 5, 2, 1, 3, 6, 6, 2, 5, 6, 1, 3, 5, 2, 3, 4, 4, 2, 5, 1, 1, 1, 3, 4, 5, 6, 1, 3, 6, 5, 4, 2, 2, 6, 2, 6, 6, 3, 1, 4, 6, 2, 2]

Here's a brief explanation of my reasoning:

* I analyzed the patterns and ranges of the acceleration data in the x, y, and z directions for each sample.
* I identified characteristic patterns for each activity, such as:
	+ Walking: relatively high acceleration values in the x and y directions, with a consistent pattern.
	+ Walking Upstairs/Downstairs: similar to walking, but with more pronounced acceleration changes in the z direction.
	+ Sitting: low acceleration values in all directions, with minimal 

In [98]:
y_pred = np.array([2, 6, 5, 2, 2, 2, 1, 1, 5, 4, 1, 4, 1, 1, 6, 2, 3, 1, 1, 4, 3, 3, 5, 6, 5, 1, 3, 6, 4, 5, 5, 2, 5, 1, 1, 6, 4, 3, 3, 3, 4, 6, 3, 4, 1, 2, 2, 6, 5, 5, 3, 6, 6, 4, 5, 3, 4, 3, 1, 4, 4, 3, 1, 5, 2, 3, 6, 1, 4, 2, 2, 5, 2, 1, 3, 6, 6, 2, 5, 6, 1, 3, 5, 2, 3, 4, 4, 2, 5, 1, 1, 1, 3, 4, 5, 6, 1, 3, 6, 5, 4, 2, 2, 6, 2, 6, 6, 3, 1, 4, 6, 2, 2])

y_pred.shape
accuracy = accuracy_score(y_test, y_pred[:54])
print(f"Accuracy: {accuracy:.4f}")

Accuracy: 0.1852


In [11]:
query = f"""
* You are a given accelerometer data of 54 persons. 
*Each person performed one of the six activities (WALKING, WALKING_UPSTAIRS, WALKING_DOWNSTAIRS, SITTING, STANDING, LAYING) wearing a smartphone (Samsung Galaxy S II) on the waist. Using its embedded accelerometer and gyroscope, we captured 3-axial linear acceleration at a constant rate of 50Hz.
*Your task is to find out which of the six activities did each person do from the following output_data array which contains data in x, y and z directions respectively for 54 people and add it to an array and return the array of length 54
*Provide the activity name and, if necessary, a brief explanation of your reasoning.
*classes = ("WALKING":1,"WALKING_UPSTAIRS":2,"WALKING_DOWNSTAIRS":3,"SITTING":4,"STANDING":5,"LAYING":6) This dictionary is used for labeling the activities.
*Dont give me codes, give me results in array of length 54 according to the dictionary in range of 1 through 6


Here are few examples:
Train_Data: {input_data}
Train_Labels: {input_labels}

Test_Data: {output_data}

using Train_Data and Train_Labels, predict the Labels of Test_Data and Return an array of Test_Labels as Output_Labels
"""