In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import multiprocessing as mp

import torch

import os
os.sys.path.append('./training')
from pytorch_model import Classifier

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

print('Device:', device)
print(mp.cpu_count(),' CPUs available')

Device: cuda
16  CPUs available


## Saving/Loading a pytorch model

In [3]:
# define a new model with the exact same architecture
model  = Classifier(use_LSTM=True,N_metrics=5)

# load it
seed = 6287 # for train test splitting
epoch = 12
batch_size = 10
model_weights_path = 'training/model_weights_all_epoch%i_seed%s_batch%i.pt'%(epoch,seed,batch_size)
model.load_state_dict(torch.load(model_weights_path))
model.eval()

Classifier(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (text_read): LSTM(300, 128)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=133, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=2, bias=True)
  )
  (softmax): Softmax(dim=1)
)

## Inputting text to the model

In [28]:
#here's an essay I just copied from the web browser chat gpt
input_text_test ="""Computers, the marvels of modern technology, have revolutionized the way we live, work, and interact with the world. These complex machines have evolved from room-sized calculators to sleek, multifunctional devices that permeate nearly every aspect of our daily lives.

At their core, computers are electronic devices designed to process data and perform various tasks with remarkable speed and precision. They operate on the principles of binary code, using a series of ones and zeroes to represent and manipulate information. This binary system forms the basis of all computer functions, enabling the execution of intricate operations through logical sequences and algorithms.

The history of computers spans several decades, marked by significant milestones and innovations. From the invention of the first mechanical computers by pioneers like Charles Babbage and Ada Lovelace to the development of transistors and integrated circuits, each advancement has contributed to the remarkable capabilities of modern computing devices.

Today, computers come in various forms, ranging from powerful desktops and laptops to compact smartphones and tablets. They serve a multitude of purposes, facilitating communication, storing vast amounts of information, conducting research, and powering industries across the globe.

The internet, a network of interconnected computers, has further expanded the capabilities of these machines. It serves as a gateway to an immeasurable wealth of information, enabling instant communication, online commerce, and global connectivity. The advent of cloud computing has revolutionized storage and accessibility, allowing users to access data and applications remotely with unparalleled convenience.

Computers have also transformed industries, enhancing productivity and efficiency in fields such as healthcare, finance, education, and entertainment. They enable complex simulations, aid in scientific discoveries, automate tasks, and facilitate collaboration on a global scale.

Moreover, the evolution of artificial intelligence (AI) has opened new frontiers in computing. AI-powered systems can learn, adapt, and make decisions, leading to advancements in areas like machine learning, natural language processing, and robotics.

However, along with their myriad benefits, computers also pose challenges such as cybersecurity threats, ethical concerns regarding AI, and the digital divide that limits access to technology for some communities.

In conclusion, computers stand as one of humanity's most transformative inventions, reshaping society and propelling progress in unprecedented ways. Their continued evolution and integration into various facets of our lives promise a future where innovation and technological advancement will continue to redefine what is possible. Embracing the potential of computers while addressing their challenges will be key to harnessing their power for the betterment of humanity."""

In [27]:
# here's a human generate text I put together
input_text_test2 ="""Please help me! I don't know what I'm doing with my life! Who knows what the meaning of life is? Certainly not me."""

### We need to do a little preprocessing 

(it might take a bit for some things to load)

In [32]:
%%time
from preprocessing import preprocess,  metrics, w2v
remap = {'a':'an', 'and':'also', 'of':'in', 'to':'at'}

def process_for_model(text):
    # extract metrics then do preprocessing
    met = metrics(text)
    
    # do some processing 
    proc = preprocess(text)
    processed_text_words = proc.split()# split by whitespace
    essay_words = [remap[word] if word in remap.keys() else word for word in processed_text_words]
    
    # do word2vec
    vecs = []
    missing_words =[]
    for word in essay_words:
        try:
            vec = w2v.get_vector(word)
            vecs.append(vec)
        except KeyError:
            # this means that the word isn't in the w2v
            missing_words.append(word)
    vecs = np.array(vecs)
    

    #print all the words that are missing
    unique_missing =  " ".join(list(np.unique(np.array(missing_words))))
    print('missing these words in model: ',unique_missing)
    
    torch_tensor = torch.from_numpy(vecs).reshape(-1,300)
    torch_met = torch.from_numpy(met).float()
    return torch_tensor, torch_met

CPU times: user 6 µs, sys: 1 µs, total: 7 µs
Wall time: 10 µs


In [33]:
%%time
tensor,metric = process_for_model(input_text_test)
metric[0] = (np.log10(metric[0])-2.3)/2.3

missing these words in model:  lovelass
CPU times: user 162 ms, sys: 10.2 ms, total: 172 ms
Wall time: 171 ms


In [34]:
tensor, tensor.shape

(tensor([[ 0.3242, -0.2432,  0.1152,  ..., -0.0060, -0.0286, -0.0728],
         [ 0.0801,  0.1050,  0.0498,  ...,  0.0037,  0.0476, -0.0688],
         [ 0.2305,  0.2275, -0.1475,  ..., -0.2812,  0.1816,  0.1494],
         ...,
         [-0.2207,  0.0554,  0.1846,  ..., -0.1777,  0.0713, -0.1309],
         [ 0.0703,  0.0869,  0.0879,  ..., -0.0476,  0.0145, -0.0625],
         [ 0.1934,  0.2373,  0.4668,  ..., -0.1914,  0.1836,  0.1396]]),
 torch.Size([409, 300]))

In [35]:
metric, metric.shape

(tensor([-0.1660,  0.1190,  0.0060,  0.8750,  0.9917]), torch.Size([5]))

### Get a prediction

In [36]:
n_inputs = 1# number of text corpuses being inputted
predictions = model(tensor.reshape(-1,n_inputs,300), metric.reshape(n_inputs,-1))

print(predictions)

tensor([[1.0000e+00, 1.1807e-13]], grad_fn=<SoftmaxBackward0>)


In [37]:
pred_GPT = predictions[0][0].item()
pred_human = predictions[0][1].item()

print('The probability the text was Chat GPT created is ', pred_GPT)
print('The probability the text was human created is ', pred_human)

The probability the text was Chat GPT created is  1.0
The probability the text was human created is  1.180696301361986e-13


What about for my human essay?

In [38]:
tensor, metric = process_for_model(input_text_test2)
metric[0] = (np.log10(metric[0])-2.3)/2.3

n_inputs = 1# number of text corpuses being inputted
predictions = model(tensor.reshape(-1,n_inputs,300), metric.reshape(n_inputs,-1))
pred_GPT = predictions[0][0].item()
pred_human = predictions[0][1].item()

print('The probability the text was Chat GPT created is ', pred_GPT)
print('The probability the text was human created is ', pred_human)

missing these words in model:  
The probability the text was Chat GPT created is  0.00015260688087437302
The probability the text was human created is  0.999847412109375


## All together
Trying to do a mix of text...(only the last line gpt generated)

In [64]:
input_text = """I'm pretty sure I've seen a few that were, but they're fairly rare. Weight is a serious consideration, especially for bronze weapons, but that wasn't the real problem.

Ironically, the wooden shaft is sturdier than an equivalently sized iron or bronze shaft would have been. A major problem for longswords in the early iron age was that they tended to bend from use. There's even a Roman account from combat with the Celts, where the Celts were using bronze longswords, and had to re-straighten their weapons on the battlefield because they were becoming bent and breaking.

In fact the Romans specifically designed a non-reusable javelin (the pilum), which exploited this issue. The pilum had an abnormally long head (roughly 60cm) When the pilum struck its target, the head would bury into the target normally, but the weight of the grip would cause the iron shaft to deform, rendering the weapon unusable.

The other side of this is that, as mentioned, wood is a lot tougher than people seem to think. I'm not sure where this idea that you could just casually chop through someone's spear with a sword came from, but as anyone who's ever tried to chop down a tree can tell you, yes in a fight between wood and metal, metal will win, but it takes a lot of effort to get there. Cutting through someone's spear with a single slice just isn't happening. That's not how wood works, and that's not how swords work. Though, attempting to do so is a good way to mangle your blade.

Wooden spear shafts were reasonably durable, and easy to replace. Bronze or iron shafts were neither.

Eventually, I think this was the 19th century, hollow steel shafts were used by cavalry. At that point in time you were looking at shafts that were significantly more durable than the wooden ones, and were cheap enough to produce that they could be easily fielded. Unfortunately, this only lasted a few decades until the proliferation of fully automatic firearms ended cavalry charges definitively. In another bit of irony, the technological advancement that allowed for effective metal spear shafts, is the same advancement that rendered them obsolete.

I think there were some iron reinforced shafts used by some cavalry forces before that, but I can't remember (nor can I quickly find) who might have been using those.

These days, hollow aluminum shafts are pretty common in javelins, and arrows, and they are generally superior to wooden shafts, however, the method to produce metallic aluminum wasn't discovered until the 19th century.

So, the short answer, wooden shafts were better. They were sturdier and easier to replace.

They encapsulate the essence of martial arts, cultural heritage, and recreational pursuits, embodying a timeless elegance that continues to intrigue and inspire enthusiasts around the world.
"""

In [70]:
input_text="""So, uh, I know that this place is mostly for writing, but making your own TTRPG system kinda qualifies as writing, doesn't it?

Either way - here's a fighting-related question that came up during my process making it. Is fear an important aspect of combat? Small-scale combat, to be specific, not the kind where you see a thousand of knights fight another thousand of knights. 

Would wounds (or even hits that are strong enough to be felt through armour) inflict noticeable stress to a well-trained soldier? Would it be bad enough to, potentially, make them panic, even if they aren't in any actual danger yet? Or would that mostly be a problem with inexperienced fighters, and training/combat experience could make someone relatively desensitized to that sort of thing? 

It's probably worded weirdly, I know, but, in general, what I'm trying to ask here is - should one consider stress/fear as a thing that might change the tides mid-combat, even if cowardice (or anything similar) isn't a major character trait for neither of the combatants?

They encapsulate the essence of martial arts, cultural heritage, and recreational pursuits, embodying a timeless elegance that continues to intrigue and inspire enthusiasts around the world
"""

In [71]:
tensor, metric = process_for_model(input_text)
metric[0] = (np.log10(metric[0])-2.3)/2.3
print(metric)

n_inputs = 1# number of text corpuses being inputted
predictions = model(tensor.reshape(-1,n_inputs,300), metric.reshape(n_inputs,-1))
pred_GPT = predictions[0][0].item()
pred_human = predictions[0][1].item()

print('The probability the text was Chat GPT created is ', pred_GPT)
print('The probability the text was human created is ', pred_human)

missing these words in model:  trutta
tensor([-0.0163,  0.0810,  0.1860,  0.7330, -0.9769])
The probability the text was Chat GPT created is  0.9999986886978149
The probability the text was human created is  1.3369302678256645e-06
