# Llama Model Analytics on Yelp Dataset

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from datasets import load_dataset


  from .autonotebook import tqdm as notebook_tqdm


## Create the Ground Truth CSV file

In [2]:
# dataset = load_dataset("KaiLv/UDR_Yelp")
# data = dataset["test"]

# data = pd.DataFrame(data).drop(columns=["idx"])
# data.to_csv("/home/grads/hassledw/ICL_Research/UDR-yelp-llama.csv")

### Load Yelp Data

In [3]:
yelp_truth_df = pd.read_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-llama.csv")
yelp_zero_df = pd.read_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-zeroshot-llama-1.csv")
yelp_few_df = pd.read_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-fewshot-llama-1.csv")
yelp_zero_2_df = pd.read_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-zeroshot-llama-2.csv")
yelp_few_2_df = pd.read_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-fewshot-llama-2.csv")
yelp_zero_3_df = pd.read_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-zeroshot-llama-3.csv")
yelp_few_3_df = pd.read_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-fewshot-llama-3.csv")

### Load ComE Data

In [4]:
# come_truth_df = pd.read_csv()

## Clean the Label Data in Zero-Shot and Run Metric

In [5]:
def get_response(text):
    '''
    Cleans the text of the label to just get the response
    '''
    valid = ["very negative", "very positive", "negative", "positive", "neutral"]
    valid_dict = {"very negative": 0, "negative": 1, "neutral": 2, "positive": 3, "very positive": 4}
    sentences = text.split("Response:")
    query = sentences[-1].strip("##").strip(" ").lower()
    
    if len(sentences[-1].split(" ")) > 3 or query not in valid:
        for v in valid:
            if v in query:
                return valid_dict[v]
        return None
    else:
        return valid_dict[query]
    

def clean_yelp_df(df):
    df["label"] = df["label"].apply(get_response)
    orig_entries = df.shape[0]
    df = df.dropna()
    print(f"Dropped {orig_entries - df.shape[0]} \"None\" entries")
    df["label"] = df["label"].astype(int)
    return df

yelp_zero_df = clean_yelp_df(yelp_zero_df)
yelp_few_df = clean_yelp_df(yelp_few_df)
yelp_zero_2_df = clean_yelp_df(yelp_zero_2_df)
yelp_few_2_df = clean_yelp_df(yelp_few_2_df)
yelp_zero_3_df = clean_yelp_df(yelp_zero_3_df)
yelp_few_3_df = clean_yelp_df(yelp_few_3_df)



Dropped 15 "None" entries
Dropped 42 "None" entries
Dropped 95 "None" entries
Dropped 96 "None" entries
Dropped 293 "None" entries
Dropped 786 "None" entries


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["label"] = df["label"].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["label"] = df["label"].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df["label"] = df["label"].astype(int)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_index

In [6]:
yelp_zero_df.to_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-zeroshot-llama-cleaned.csv")
yelp_few_df.to_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-fewshot-llama-1-cleaned.csv")

yelp_zero_2_df.to_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-zeroshot-llama-2-cleaned.csv")
yelp_few_2_df.to_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-fewshot-llama-2-cleaned.csv")

yelp_zero_3_df.to_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-zeroshot-llama-3-cleaned.csv")
yelp_few_3_df.to_csv("/home/grads/hassledw/ICL_Research/UDR_yelp_results/UDR-yelp-fewshot-llama-3-cleaned.csv")


## Compare Ground Truth and *-Shot
Here are the rrompts used for the baseline tasks.

**Zero Shot**:
```
Please rate the sentiment of the following text # "very negative", "negative", "neutral", "positive", or "very positive"#:
"### \"{query}\""
"### Response:"""
```

**Few Shot Prompt 1**:

```
"Here are some demonstration examples for the sentiment classification task:
            1. ""It's been 5 years, I guess I should update my BOP review!\n\nNice range of english & irish beers on tap. They usually have some kind of shot special going on when I'm there. The drinks are a tad pricy..."" = ""neutral""
            2. ""Love the concept of this place. My mom and I have been going for a few years now and always have a blast. We start with fun cocktails at Salt. Then we go find our seats (its worth it to pay the extra ..."" = ""very positive""
            Please rate the sentiment of the following text # ""very negative"", ""negative"", ""neutral"", ""positive"", or ""very positive""#.
### ""Excellent sushi, great service, and good happy hour specials.  It has a decent sports bar theme.  I had my bachelor party here and even my friends who don't care for fish liked the place.  I also go here for business dinners.\n\nTry the Ladybug Roll just for the presentation.""""
### Response: very positive"
```

**Few Shot Prompt 2**:
```
"### #Review#: ""We came inside waited for ten minutes to be acknowledge then we had to wait another five to ten minutes  to be seated for a booth as. As we are waiting I seen a booth and the hostess  told me \""that the server was full, so I said okay then once the manager came up she told the hostess to sit us at the same exact booth..... So as we were seated a SPIDER!!!!! crawls across the table.....I'm so done with these MOFOS!!!!!""

    #Here are some examples for the task:
    1. ""Meh!! This is a typical AZ hyped up joint. I have eaten here twice. Both times have left much to desire. First off the bread was toooo freakin thick. The meats are not very high quality in flavor or t..."" = ""negative""
    2. ""I stopped in here on my way to Cali hoping for something gluten free before my flight. It was pretty busy and only a couple overly stressed servers. It has a long bar, naturally as its Vegas, and then..."" = ""negative""

    Please rate the sentiment of the Review: ""very negative"", ""negative"", ""neutral"", ""positive"", or ""very positive""#.
    ### Response:  Very Negative"
```   

In [7]:
def gather_accuracy(df):
    df_results = pd.merge(yelp_truth_df, df, on=['sentence'], how='inner')
    accurate_results = df_results[df_results["label_x"] == df_results["label_y"]]
    return len(accurate_results) / len(df_results) * 100

print("PROMPT 1")
print(f"Llama-7b Prediction Accuracy (Zero-shot): {gather_accuracy(yelp_zero_df):.2f}%")
print(f"Llama-7b Prediction Accuracy (Few-shot): {gather_accuracy(yelp_few_df):.2f}%")
print()
print("PROMPT 2")
print(f"Llama-7b Prediction Accuracy (Zero-shot): {gather_accuracy(yelp_zero_2_df):.2f}%")
print(f"Llama-7b Prediction Accuracy (Few-shot): {gather_accuracy(yelp_few_2_df):.2f}%")
print()
print("PROMPT 3")
print(f"Llama-7b Prediction Accuracy (Zero-shot): {gather_accuracy(yelp_zero_3_df):.2f}%")
print(f"Llama-7b Prediction Accuracy (Few-shot): {gather_accuracy(yelp_few_3_df):.2f}%")

PROMPT 1
Llama-7b Prediction Accuracy (Zero-shot): 43.55%
Llama-7b Prediction Accuracy (Few-shot): 42.60%

PROMPT 2
Llama-7b Prediction Accuracy (Zero-shot): 41.00%
Llama-7b Prediction Accuracy (Few-shot): 30.37%

PROMPT 3
Llama-7b Prediction Accuracy (Zero-shot): 47.69%
Llama-7b Prediction Accuracy (Few-shot): 47.24%
