# Essay Grader

The goal of this is to have the AI output whether the essay is good or bad on a scale from 1 to 10.

In [4]:
import tensorflow as tf
import keras
import keras.layers as layers
import numpy as np
import os
import pickle

## Load Glove

In [2]:
PATH_TO_GLOVE_FILE = "./glove.6B.100d.txt"
GLOVE_OUTPUT_DIM = 100

In [5]:
embeddings = {}

if not os.path.exists("./processed_glove.b"):
    print("loading glove from scratch")
    with open(PATH_TO_GLOVE_FILE) as f:
        for line in f:
            word, coefs = line.split(maxsplit=1)
            coefs = np.fromstring(coefs, "f", sep=" ")
            embeddings[word] = coefs

    print("Found %s word vectors." % len(embeddings))

    with open("./processed_glove.b", "wb") as file:
        pickle.dump(embeddings, file)
        file.close()
else:
    print("loading preprocessed glove")
    with open("./processed_glove.b", "rb") as file:
        embeddings = pickle.load(file)
        file.close()
    
    print("Found %s word vectors." % len(embeddings))


loading glove from scratch
Found 400000 word vectors.


## Get the Text Vectorizer

In [6]:
vectorizer = layers.TextVectorization(len(embeddings))

2022-10-12 21:31:45.627088: E tensorflow/stream_executor/cuda/cuda_driver.cc:265] failed call to cuInit: CUDA_ERROR_NO_DEVICE: no CUDA-capable device is detected
2022-10-12 21:31:45.627388: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (DESKTOP-17QUFAA): /proc/driver/nvidia/version does not exist
2022-10-12 21:31:45.636577: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [7]:
# quickly learn the words (should take about 40 seconds)
vectorizer_batch_size = 10
quick_dataset = tf.data.Dataset.from_tensor_slices(np.array(list(embeddings.keys()))).batch(vectorizer_batch_size)
vectorizer.adapt(quick_dataset, steps=len(embeddings)/vectorizer_batch_size)

In [8]:
# try it out
vectorizer(["I saw it, and it was cool."])

<tf.Tensor: shape=(1, 7), dtype=int64, numpy=array([[  8103,  91227,   2926, 346554,   2926,  29418, 294322]])>

In [9]:
# make it non-trainable
vectorizer.trainable = False

# get vocabulary
voc = vectorizer.get_vocabulary()
word_index = dict(zip(voc, range(len(voc))))

## Make Embedding Layer

In [10]:
num_tokens = len(voc) + 2
hits = 0
misses = 0

# Prepare embedding matrix
embedding_matrix = np.zeros((num_tokens, GLOVE_OUTPUT_DIM))
for word, i in word_index.items():
    embedding_vector = embeddings.get(word)
    if embedding_vector is not None:
        # Words not found in embedding index will be all-zeros.
        # This includes the representation for "padding" and "OOV"
        embedding_matrix[i] = embedding_vector
        hits += 1
    else:
        misses += 1
print("Converted %d words (%d misses)" % (hits, misses))

Converted 336326 words (38711 misses)


In [12]:
glove = keras.layers.Embedding(
    num_tokens,
    GLOVE_OUTPUT_DIM,
    embeddings_initializer=keras.initializers.Constant(embedding_matrix),
    trainable=False,
)

## Compare Glove Embeddings Layer and Raw Glove

In [13]:
glove(vectorizer(["the"]))

<tf.Tensor: shape=(1, 1, 100), dtype=float32, numpy=
array([[[-0.038194, -0.24487 ,  0.72812 , -0.39961 ,  0.083172,
          0.043953, -0.39141 ,  0.3344  , -0.57545 ,  0.087459,
          0.28787 , -0.06731 ,  0.30906 , -0.26384 , -0.13231 ,
         -0.20757 ,  0.33395 , -0.33848 , -0.31743 , -0.48336 ,
          0.1464  , -0.37304 ,  0.34577 ,  0.052041,  0.44946 ,
         -0.46971 ,  0.02628 , -0.54155 , -0.15518 , -0.14107 ,
         -0.039722,  0.28277 ,  0.14393 ,  0.23464 , -0.31021 ,
          0.086173,  0.20397 ,  0.52624 ,  0.17164 , -0.082378,
         -0.71787 , -0.41531 ,  0.20335 , -0.12763 ,  0.41367 ,
          0.55187 ,  0.57908 , -0.33477 , -0.36559 , -0.54857 ,
         -0.062892,  0.26584 ,  0.30205 ,  0.99775 , -0.80481 ,
         -3.0243  ,  0.01254 , -0.36942 ,  2.2167  ,  0.72201 ,
         -0.24978 ,  0.92136 ,  0.034514,  0.46745 ,  1.1079  ,
         -0.19358 , -0.074575,  0.23353 , -0.052062, -0.22044 ,
          0.057162, -0.15806 , -0.30798 , -0.41625 

In [14]:
embeddings["the"]

array([-0.038194, -0.24487 ,  0.72812 , -0.39961 ,  0.083172,  0.043953,
       -0.39141 ,  0.3344  , -0.57545 ,  0.087459,  0.28787 , -0.06731 ,
        0.30906 , -0.26384 , -0.13231 , -0.20757 ,  0.33395 , -0.33848 ,
       -0.31743 , -0.48336 ,  0.1464  , -0.37304 ,  0.34577 ,  0.052041,
        0.44946 , -0.46971 ,  0.02628 , -0.54155 , -0.15518 , -0.14107 ,
       -0.039722,  0.28277 ,  0.14393 ,  0.23464 , -0.31021 ,  0.086173,
        0.20397 ,  0.52624 ,  0.17164 , -0.082378, -0.71787 , -0.41531 ,
        0.20335 , -0.12763 ,  0.41367 ,  0.55187 ,  0.57908 , -0.33477 ,
       -0.36559 , -0.54857 , -0.062892,  0.26584 ,  0.30205 ,  0.99775 ,
       -0.80481 , -3.0243  ,  0.01254 , -0.36942 ,  2.2167  ,  0.72201 ,
       -0.24978 ,  0.92136 ,  0.034514,  0.46745 ,  1.1079  , -0.19358 ,
       -0.074575,  0.23353 , -0.052062, -0.22044 ,  0.057162, -0.15806 ,
       -0.30798 , -0.41625 ,  0.37972 ,  0.15006 , -0.53212 , -0.2055  ,
       -1.2526  ,  0.071624,  0.70565 ,  0.49744 , 

## Make Model

In [15]:
def make_text_preprocessor():
    preprocessor = keras.models.Sequential()
    preprocessor.add(vectorizer)
    preprocessor.add(glove)
    return preprocessor

def make_model(preprocessor):
    model = keras.models.Sequential()
    model.add(layers.Input(preprocessor.output_shape[1:]))

    # slowly change from 100 to 25 numbers per word
    model.add(layers.Conv1D(75, 5, activation='relu'))
    model.add(layers.MaxPooling1D())
    model.add(layers.Conv1D(50, 5, activation='relu'))
    model.add(layers.MaxPooling1D())
    model.add(layers.Conv1D(25, 5, activation='relu'))
    model.add(layers.MaxPooling1D())

    # use lstm to get the meanings from sequences of words
    model.add(layers.LSTM(512))

    # get down to 1 output
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dropout(0.3))

    model.add(layers.Dense(1, activation='sigmoid'))

    return model

In [16]:
text_preprocessor = make_text_preprocessor()
text_preprocessor.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 text_vectorization (TextVec  (None, None)             0         
 torization)                                                     
                                                                 
 embedding_1 (Embedding)     (None, None, 100)         37503900  
                                                                 
Total params: 37,503,900
Trainable params: 0
Non-trainable params: 37,503,900
_________________________________________________________________


In [17]:
grader = make_model(text_preprocessor)
grader.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, None, 75)          37575     
                                                                 
 max_pooling1d (MaxPooling1D  (None, None, 75)         0         
 )                                                               
                                                                 
 conv1d_1 (Conv1D)           (None, None, 50)          18800     
                                                                 
 max_pooling1d_1 (MaxPooling  (None, None, 50)         0         
 1D)                                                             
                                                                 
 conv1d_2 (Conv1D)           (None, None, 25)          6275      
                                                                 
 max_pooling1d_2 (MaxPooling  (None, None, 25)        

## Load Data

In [None]:
# todo: get data