In [1]:
from classifier import *
import matplotlib.pyplot as plt
%matplotlib inline
import cv2
import webvtt
from collections import defaultdict
instructor = 'Sara M Ichinaga' 
transcript = 'transcript.vtt'
chat = 'chat.txt'

In [2]:
# Load original training data
train_data = np.load('train_data.npy')
max_num = train_data.shape[0]
max_len = train_data.shape[1]
min_val = np.min(train_data)
max_val = np.max(train_data)

# Load the text classifier 
PATH = 'auto.pth'
model = text_classifier(train_data.shape[0], train_data.shape[1])
model.load_state_dict(torch.load(PATH, map_location=torch.device('cpu')))
model.eval()

text_classifier(
  (conv): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (encoder): Sequential(
    (0): Linear(in_features=270, out_features=4, bias=True)
    (1): Sigmoid()
    (2): Linear(in_features=4, out_features=2, bias=True)
  )
)

In [3]:
def remove_punc(s):
    remove_chars = ['.', ',', '?', '!']
    return_s = ''
    for char in s:
        if (char not in remove_chars):
            return_s += char
    return return_s

def classify(phrase):
    print('Phrase:')
    print(phrase)
    test = process_string(phrase, max_num, max_len)
    test = (test - min_val) / (max_val - min_val)
    tensor = torch.FloatTensor(test).unsqueeze(0).unsqueeze(0)
    result = model(tensor).detach().numpy()
    print('Classification:')
    score = 0
    print(result)
    if (result [0][0] > result[0][1]):
        print('Relevant')
        score += 1
    else:
        print('Irrelevant')
    print()
    return score

# Example classifier run 
phrase = 'Where can we find the homework assignment'
classify(phrase)

Phrase:
Where can we find the homework assignment
Classification:
[[ 1.0174953  -0.01749544]]
Relevant



1

In [4]:
# Build dictionary of students and their phrases
phrases = defaultdict(list)

# Examine the video transcript for phrases
for caption in webvtt.read(transcript):
    if (':' in caption.text):
        next_line = caption.text.split(':')
        speaker = next_line[0]
        phrase = next_line[1][1:]
        if (speaker != instructor):
            phrases[speaker].append(remove_punc(phrase))
            
# Examine chat for phrases
f = open(chat, 'r') 
while True: 
    next_line = f.readline()
    if not next_line: # EOF
        break
    parts = next_line.split('\t')
    speaker = parts[1][:-1]
    phrase = parts[2][:-1]
    if (speaker != instructor):
        phrases[speaker].append(remove_punc(phrase))
f.close() 

In [5]:
num_students = len(phrases)
relevance_scores = np.zeros(num_students)
for i, student in enumerate(phrases.keys()):
    print('Student:')
    print(student)
    print()
    
    for phrase in phrases[student]:
        if (len(phrase.split()) <= max_num and len(phrase) <= max_len): 
            relevance_scores[i] += classify(phrase)
        else:
            words = phrase.split()
            phrase1 = ''
            phrase2 = ''
            for j in range(len(words) // 2):
                phrase1 += words[j] + ' '
            for j in range(len(words) // 2, len(words)):
                phrase2 += words[j] + ' '
            relevance_scores[i] += classify(phrase1)
            relevance_scores[i] += classify(phrase2)

Student:
Isabella Yuyun Heppe

Phrase:
No problems as you can tell
Classification:
[[ 1.0174953  -0.01749544]]
Relevant

Phrase:
Thank you
Classification:
[[0.25871164 0.74128824]]
Irrelevant

Phrase:
Yeah
Classification:
[[-0.0318222  1.0318218]]
Irrelevant

Phrase:
You What's that
Classification:
[[0.25871164 0.74128824]]
Irrelevant

Phrase:
Can you explain what that is
Classification:
[[0.25871164 0.74128824]]
Irrelevant

Phrase:
Yes
Classification:
[[-0.0318222  1.0318218]]
Irrelevant

Phrase:
Can you give me an 
Classification:
[[0.25871164 0.74128824]]
Irrelevant

Phrase:
idea of a cats personality 
Classification:
[[-0.0318222  1.0318218]]
Irrelevant

Phrase:
Yes sir They like claws
Classification:
[[0.25871164 0.74128824]]
Irrelevant

Phrase:
Does the English have 
Classification:
[[0.25871164 0.74128824]]
Irrelevant

Phrase:
any other word for claw 
Classification:
[[0.25871164 0.74128824]]
Irrelevant

Phrase:
Good to know
Classification:
[[0.25871164 0.74128824]]
Irrelevant

