In [14]:
import pandas as pd
import pickle
import numpy as np
import matplotlib.pyplot as plt
from sklearn import metrics
from collections import Counter


In [2]:
filename = "MultinomialNB_alpha_0.01"

In [5]:
label_encoding = {
    0: "Deliberation",
    1: "Imaginative entry",
    2: "Other",
    3: "Procedure",
    4: "Seminar",
    5: "Social",
    6: "UX",
}

In [3]:
df = pd.read_pickle(filename)

In [4]:
df

Unnamed: 0,ground_truth,predicted,text
0,3,0,I just asked Samantha if this activity is time...
1,4,0,That's what I thought to or how I interpreted ...
2,4,4,Hey Ashley Peterson! I also wanted to mention ...
3,0,5,Yes I found this by luck...LOL!
4,4,4,"Ana, I agree with you that the king is so adam..."
...,...,...,...
179,4,4,The Lady was behind the door.
180,1,4,"Not what do you think she would do, but what w..."
181,0,0,ok
182,0,4,Live without was the first one I put. Then Tr...


### MultinomialNB

In [11]:
df["ground_truth"] = df["ground_truth"].map(label_encoding)
df["predicted"] = df["predicted"].map(label_encoding)

# Identify misclassifications
misclassifications = df[df["ground_truth"] != df["predicted"]]

# Analyze the most common mistakes
mistakes = list(zip(misclassifications["ground_truth"], misclassifications["predicted"]))
common_mistakes = Counter(mistakes)

# Get a summary of all misclassifications
summary = misclassifications.groupby(["ground_truth", "predicted"]).size().reset_index(name='count')

# Display results
print("Most common mistakes:")
for mistake, count in common_mistakes.most_common():
    print(f"{mistake}: {count} times")

print("\nSummary of misclassifications:")
print(summary)



Most common mistakes:
('Deliberation', 'Seminar'): 10 times
('Imaginative entry', 'Seminar'): 7 times
('Social', 'Seminar'): 4 times
('UX', 'Deliberation'): 4 times
('Procedure', 'Deliberation'): 3 times
('Deliberation', 'Procedure'): 3 times
('Social', 'UX'): 3 times
('UX', 'Seminar'): 3 times
('Seminar', 'Deliberation'): 2 times
('Procedure', 'Seminar'): 2 times
('Seminar', 'Social'): 2 times
('UX', 'Procedure'): 2 times
('Deliberation', 'Social'): 1 times
('Other', 'Seminar'): 1 times
('Social', 'Deliberation'): 1 times
('UX', 'Social'): 1 times
('Seminar', 'UX'): 1 times
('Deliberation', 'UX'): 1 times

Summary of misclassifications:
         ground_truth     predicted  count
0        Deliberation     Procedure      3
1        Deliberation       Seminar     10
2        Deliberation        Social      1
3        Deliberation            UX      1
4   Imaginative entry       Seminar      7
5               Other       Seminar      1
6           Procedure  Deliberation      3
7         

In [12]:
misclassifications

Unnamed: 0,ground_truth,predicted,text
0,Procedure,Deliberation,I just asked Samantha if this activity is time...
1,Seminar,Deliberation,That's what I thought to or how I interpreted ...
3,Deliberation,Social,Yes I found this by luck...LOL!
8,Imaginative entry,Seminar,I'd rather be sad for a little while that the ...
15,Deliberation,Seminar,I know live without him
28,Deliberation,Seminar,I like it
30,Deliberation,Procedure,Submitted
35,Other,Seminar,w
38,Procedure,Seminar,"Ok, that's how I understand it as well. Let's ..."
39,Deliberation,Seminar,So how do you guys think we should end it?


In [13]:
summary

Unnamed: 0,ground_truth,predicted,count
0,Deliberation,Procedure,3
1,Deliberation,Seminar,10
2,Deliberation,Social,1
3,Deliberation,UX,1
4,Imaginative entry,Seminar,7
5,Other,Seminar,1
6,Procedure,Deliberation,3
7,Procedure,Seminar,2
8,Seminar,Deliberation,2
9,Seminar,Social,2


### Random Forest

In [15]:
filename = "RandomForest_150_estimators_30_max_depth"

df = pd.read_pickle(filename)

df["ground_truth"] = df["ground_truth"].map(label_encoding)
df["predicted"] = df["predicted"].map(label_encoding)

# Identify misclassifications
misclassifications = df[df["ground_truth"] != df["predicted"]]

# Analyze the most common mistakes
mistakes = list(zip(misclassifications["ground_truth"], misclassifications["predicted"]))
common_mistakes = Counter(mistakes)

# Get a summary of all misclassifications
summary = misclassifications.groupby(["ground_truth", "predicted"]).size().reset_index(name='count')

# Display results
print("Most common mistakes:")
for mistake, count in common_mistakes.most_common():
    print(f"{mistake}: {count} times")

print("\nSummary of misclassifications:")
print(summary)


Most common mistakes:
('Deliberation', 'Seminar'): 12 times
('Social', 'Seminar'): 10 times
('UX', 'Seminar'): 9 times
('Procedure', 'Seminar'): 7 times
('Imaginative entry', 'Seminar'): 6 times
('Seminar', 'Deliberation'): 4 times
('UX', 'Deliberation'): 3 times
('Procedure', 'Deliberation'): 2 times
('Social', 'Deliberation'): 2 times
('Seminar', 'Social'): 2 times
('Deliberation', 'Procedure'): 1 times
('Other', 'Seminar'): 1 times
('Social', 'Procedure'): 1 times
('Imaginative entry', 'Deliberation'): 1 times
('Procedure', 'Social'): 1 times

Summary of misclassifications:
         ground_truth     predicted  count
0        Deliberation     Procedure      1
1        Deliberation       Seminar     12
2   Imaginative entry  Deliberation      1
3   Imaginative entry       Seminar      6
4               Other       Seminar      1
5           Procedure  Deliberation      2
6           Procedure       Seminar      7
7           Procedure        Social      1
8             Seminar  Delibe

In [16]:
misclassifications

Unnamed: 0,ground_truth,predicted,text
0,Procedure,Seminar,I just asked Samantha if this activity is time...
3,Deliberation,Seminar,Yes I found this by luck...LOL!
8,Imaginative entry,Seminar,I'd rather be sad for a little while that the ...
15,Deliberation,Seminar,I know live without him
20,Social,Seminar,"Okay, cool"
...,...,...,...
175,Procedure,Seminar,Do we know how many people are part of this gr...
176,Seminar,Deliberation,it's wonderful!
180,Imaginative entry,Seminar,"Not what do you think she would do, but what w..."
182,Deliberation,Seminar,Live without was the first one I put. Then Tr...


In [17]:
summary

Unnamed: 0,ground_truth,predicted,count
0,Deliberation,Procedure,1
1,Deliberation,Seminar,12
2,Imaginative entry,Deliberation,1
3,Imaginative entry,Seminar,6
4,Other,Seminar,1
5,Procedure,Deliberation,2
6,Procedure,Seminar,7
7,Procedure,Social,1
8,Seminar,Deliberation,4
9,Seminar,Social,2


### SVM

In [18]:
filename = "SVM_SVC_sigmoid_kernel"

df = pd.read_pickle(filename)

df["ground_truth"] = df["ground_truth"].map(label_encoding)
df["predicted"] = df["predicted"].map(label_encoding)

# Identify misclassifications
misclassifications = df[df["ground_truth"] != df["predicted"]]

# Analyze the most common mistakes
mistakes = list(zip(misclassifications["ground_truth"], misclassifications["predicted"]))
common_mistakes = Counter(mistakes)

# Get a summary of all misclassifications
summary = misclassifications.groupby(["ground_truth", "predicted"]).size().reset_index(name='count')

# Display results
print("Most common mistakes:")
for mistake, count in common_mistakes.most_common():
    print(f"{mistake}: {count} times")

print("\nSummary of misclassifications:")
print(summary)


Most common mistakes:
('Deliberation', 'Seminar'): 11 times
('UX', 'Seminar'): 9 times
('Social', 'Seminar'): 8 times
('Imaginative entry', 'Seminar'): 7 times
('Procedure', 'Seminar'): 4 times
('Procedure', 'Deliberation'): 3 times
('UX', 'Deliberation'): 3 times
('Social', 'Deliberation'): 2 times
('Seminar', 'Deliberation'): 2 times
('Deliberation', 'Social'): 1 times
('Deliberation', 'Procedure'): 1 times
('Other', 'Seminar'): 1 times
('UX', 'Social'): 1 times
('Seminar', 'UX'): 1 times
('Imaginative entry', 'Deliberation'): 1 times
('Procedure', 'Social'): 1 times
('Deliberation', 'UX'): 1 times

Summary of misclassifications:
         ground_truth     predicted  count
0        Deliberation     Procedure      1
1        Deliberation       Seminar     11
2        Deliberation        Social      1
3        Deliberation            UX      1
4   Imaginative entry  Deliberation      1
5   Imaginative entry       Seminar      7
6               Other       Seminar      1
7           Proc

In [19]:
misclassifications

Unnamed: 0,ground_truth,predicted,text
0,Procedure,Deliberation,I just asked Samantha if this activity is time...
3,Deliberation,Social,Yes I found this by luck...LOL!
8,Imaginative entry,Seminar,I'd rather be sad for a little while that the ...
15,Deliberation,Seminar,I know live without him
22,Social,Seminar,(I've been playing dnd too much and it has inf...
25,Procedure,Deliberation,I am late to the discussion
28,Deliberation,Seminar,I like it
29,Deliberation,Seminar,So the top is for chatting and the bottom is f...
30,Deliberation,Procedure,Submitted
35,Other,Seminar,w


In [20]:
summary

Unnamed: 0,ground_truth,predicted,count
0,Deliberation,Procedure,1
1,Deliberation,Seminar,11
2,Deliberation,Social,1
3,Deliberation,UX,1
4,Imaginative entry,Deliberation,1
5,Imaginative entry,Seminar,7
6,Other,Seminar,1
7,Procedure,Deliberation,3
8,Procedure,Seminar,4
9,Procedure,Social,1


### XGBoost

In [21]:
filename = "XGBoost"

df = pd.read_pickle(filename)

df["ground_truth"] = df["ground_truth"].map(label_encoding)
df["predicted"] = df["predicted"].map(label_encoding)

# Identify misclassifications
misclassifications = df[df["ground_truth"] != df["predicted"]]

# Analyze the most common mistakes
mistakes = list(zip(misclassifications["ground_truth"], misclassifications["predicted"]))
common_mistakes = Counter(mistakes)

# Get a summary of all misclassifications
summary = misclassifications.groupby(["ground_truth", "predicted"]).size().reset_index(name='count')

# Display results
print("Most common mistakes:")
for mistake, count in common_mistakes.most_common():
    print(f"{mistake}: {count} times")

print("\nSummary of misclassifications:")
print(summary)


Most common mistakes:
('Deliberation', 'Seminar'): 14 times
('Social', 'Seminar'): 10 times
('UX', 'Seminar'): 9 times
('Imaginative entry', 'Seminar'): 7 times
('Procedure', 'Seminar'): 6 times
('UX', 'Deliberation'): 4 times
('Social', 'Deliberation'): 3 times
('Procedure', 'Deliberation'): 2 times
('Seminar', 'Deliberation'): 2 times
('Other', 'Seminar'): 1 times
('Social', 'UX'): 1 times
('Seminar', 'Imaginative entry'): 1 times
('Seminar', 'Social'): 1 times
('Social', 'Procedure'): 1 times
('Seminar', 'UX'): 1 times
('Imaginative entry', 'Deliberation'): 1 times
('Procedure', 'Social'): 1 times
('Deliberation', 'UX'): 1 times

Summary of misclassifications:
         ground_truth          predicted  count
0        Deliberation            Seminar     14
1        Deliberation                 UX      1
2   Imaginative entry       Deliberation      1
3   Imaginative entry            Seminar      7
4               Other            Seminar      1
5           Procedure       Deliberation

In [22]:
misclassifications

Unnamed: 0,ground_truth,predicted,text
0,Procedure,Deliberation,I just asked Samantha if this activity is time...
8,Imaginative entry,Seminar,I'd rather be sad for a little while that the ...
9,Deliberation,Seminar,I tried them all at once and it told me wrong lol
14,Social,Seminar,Hello.
15,Deliberation,Seminar,I know live without him
...,...,...,...
171,Deliberation,Seminar,Which questions? like the prompt?
175,Procedure,Seminar,Do we know how many people are part of this gr...
180,Imaginative entry,Seminar,"Not what do you think she would do, but what w..."
182,Deliberation,Seminar,Live without was the first one I put. Then Tr...


In [23]:
summary

Unnamed: 0,ground_truth,predicted,count
0,Deliberation,Seminar,14
1,Deliberation,UX,1
2,Imaginative entry,Deliberation,1
3,Imaginative entry,Seminar,7
4,Other,Seminar,1
5,Procedure,Deliberation,2
6,Procedure,Seminar,6
7,Procedure,Social,1
8,Seminar,Deliberation,2
9,Seminar,Imaginative entry,1
