# Analyze Jarvis Natural Language Classifications

## Harrison Myers

This notebook analyzes the output from the Jarvis chatbot natural language classifier deployed in Slack.

In [None]:
import os
from zipfile import ZipFile
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

### Helper functions

In [None]:
def create_dirs():
    """
    Unzips the external-data folder and creates the directories necessary to run the 
    rest of the script. Changes the working directory, and returns the working
    directory path, and the path of the enclosing, unzipped "data" folder
    containing the datafiles. 
    """
    # Create relative directory path and change directory
    dir_path = os.path.dirname(os.path.realpath(__file__))
    os.chdir(dir_path)
    
    # Make data folder and unzip reports to this folder
    try:
        os.mkdir("data")
        # loading the temp.zip and creating a zip object 
        with ZipFile(dir_path + "\\external-data.zip") as zObject: 
            # Extracting all the members of the zip into data
            zObject.extractall(dir_path)
    except:
        pass
    
    # Make a path that points to the reports folder where data is stored
    data_dir = dir_path + "\\data"
    
    return dir_path, data_dir

dir_path, data_dir = create_dirs()

def load_model_results(filename):
    """
    Reads model results from a .txt file and saves the text and labels as a dataframe
    """
    texts     = []
    labels    = []
    NB_class  = []
    SGD_class = []
    with open(filename, 'r') as file:   # Open the desired file
        lines = []                      # Create an empty list for storing lines in the file
        for line in file:               # Loop through every line in the file
            lines.append(line)          # Append each line to the list, lines

        for line in lines:              # Loop through every line in lines
            try:
                str_list = line.split(",")  # try to split the lines by ,
                texts.append(str_list[0])
                labels.append(str_list[1].strip(" "))
                NB_class.append(int(str_list[2]))
                SGD_class.append(int(str_list[3]))
            except:                         
                pass
            
    # turn into dataframe
    data = {"Text": texts, 
            "Label": labels, 
            "NB_class": NB_class, 
            "SGD_class":SGD_class}
    df = pd.DataFrame(data)
    
    return df

### Result Exploration

In [None]:
res = load_model_results("outputs/model_results.txt")

NB_accuracy = np.sum(res["NB_class"]) / len(res["NB_class"])
SGD_accuracy = np.sum(res["SGD_class"]) / len(res["SGD_class"])

# split into label dataframes
labels = ["TIME", "PIZZA", "WEATHER", "GREET", "JOKE"]

label_df_dict = dict.fromkeys(labels)

label_num_accurate_NB  = []
label_num_accurate_SGD = []
label_accuracy_NB = []
label_accuracy_SGD = []

for key, value in label_df_dict.items():
    label_df_dict[key] = res[res["Label"] == key]

for key, value in label_df_dict.items():
    label_num_accurate_NB.append(np.sum(value["NB_class"]))
    label_accuracy_NB.append(round(np.sum(value["NB_class"])/len(value["NB_class"]), 3))
    label_num_accurate_SGD.append(np.sum(value["SGD_class"]))
    label_accuracy_SGD.append(round(np.sum(value["SGD_class"])/len(value["SGD_class"]), 3))

# Append overall accuracy
labels.append("All Categories")
label_accuracy_NB.append(NB_accuracy)
label_accuracy_SGD.append(SGD_accuracy)

# Plot the data

fig, ax = plt.subplots(figsize=(10, 6))
bar_width = 0.35  # Adjust the width of the bars

# Generating indexes for bars
x = np.arange(len(labels))

# Plot NB accuracy
ax.bar(x - bar_width/2, label_accuracy_NB, bar_width, label='Naive Bayes Accuracy', color='skyblue')

# Plot SGD accuracy
ax.bar(x + bar_width/2, label_accuracy_SGD, bar_width, label='SGD Accuracy', color='orange')

ax.set_xlabel('Labels')
ax.set_ylabel('Accuracy')
ax.set_title('NB vs.SGD model accuracy by Category')
ax.set_xticks(x)
ax.set_xticklabels(labels)
ax.legend()
plt.tight_layout()
plt.show()