### Perform transfer learning to identify human and ai generated texts

In [1]:
import pickle

with open('../data/texts/gpt_encoded.pkl', 'rb') as f:
    inp = pickle.load(f)

X = inp[0]
y = inp[1]

In [2]:
from sklearn.model_selection import train_test_split

#Create Train, Val and Test Sets

X_train, X_o, y_train, y_o = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=111)
X_test, X_val, y_test, y_val = train_test_split(X_o, y_o, test_size=0.5, shuffle=True, random_state=112)


In [3]:
from torch.utils.data import Dataset

class TextClassifierDataset(Dataset):
    def __init__(self, text_examples, text_labels):
        self.X = text_examples
        self.y = text_labels

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        to_pass = self.X[idx]
        label = self.y[idx]
    
        return to_pass, label

In [4]:
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

#Define model

class ClassifierModel(nn.Module):

    def __init__(self, embedding_dim, out_dim=2):
        super().__init__()

        #Softmax Classification Layer
        self.linear = nn.Linear(in_features=embedding_dim, out_features=out_dim, bias=False) #Linear layer 

    def forward(self, X, targets):
        
        logits = self.linear(X)

        loss = F.cross_entropy(logits, targets, reduction='sum')

        return logits, loss


In [5]:
from torch.utils.data import DataLoader
import numpy as np
import torch

batch_size = 4096 * 8
iterations = 1000
print_cadence = 50
learning_rate = 0.001

#Set random seed
np.random.seed(0)
torch.manual_seed(1)

#Instantiate model

classifier = ClassifierModel(X.shape[1], 2)
classifier.to('cuda')

#Define optimizer

optimizer = torch.optim.Adam(classifier.parameters(), lr=learning_rate)

#Set up datasets and dataloaders
train_dataset = TextClassifierDataset(X_train, y_train)
val_dataset = TextClassifierDataset(X_val, y_val)

train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
generator = iter(train_dataloader)
#Perform training

for iteri in range(iterations):

    #Output as desired

    if iteri % print_cadence == 0:

        with torch.no_grad():

            #Set to eval mode
            classifier.eval()

            #Compute train/val losses

            scoring = DataLoader(train_dataset, batch_size=batch_size, shuffle=False)

            train_loss = 0.0

            for X, y in scoring:   # gives batch data
                _, loss_temp = classifier(X.to('cuda'), y.to('cuda'))
                train_loss += loss_temp

            scoring = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

            val_loss = 0.0

            for X, y in scoring:   # gives batch data
                _, loss_temp = classifier(X.to('cuda'), y.to('cuda'))
                val_loss += loss_temp

            print(f'iter: {iteri} train_loss: {train_loss} val_loss: {val_loss}')

            #Set back to train mode
            classifier.train()

    #Zero gradients

    classifier.zero_grad()

    #Get random batch

    try:
        # Samples the batch
        X, y = next(generator)
    except StopIteration:

        # restart the generator if the previous generator is exhausted.
        generator = iter(train_dataloader)
        X, y = next(generator)

    #Pass model forward

    _, loss = classifier(X.to('cuda'), y.to('cuda'))

    #Call Backward to get gradients

    loss.backward()

    #Perform optimization

    optimizer.step()

iter: 0 train_loss: 911051.375 val_loss: 113506.5625
iter: 50 train_loss: 455204.78125 val_loss: 56760.3359375
iter: 100 train_loss: 399202.03125 val_loss: 49764.48046875
iter: 150 train_loss: 373082.4375 val_loss: 46491.29296875
iter: 200 train_loss: 356354.3125 val_loss: 44443.37109375
iter: 250 train_loss: 344450.96875 val_loss: 42948.56640625
iter: 300 train_loss: 335571.96875 val_loss: 41881.5859375
iter: 350 train_loss: 329228.34375 val_loss: 41125.015625
iter: 400 train_loss: 323440.46875 val_loss: 40408.7265625
iter: 450 train_loss: 319172.96875 val_loss: 39889.94140625
iter: 500 train_loss: 315468.6875 val_loss: 39452.73828125
iter: 550 train_loss: 312475.875 val_loss: 39098.30078125
iter: 600 train_loss: 310083.53125 val_loss: 38822.02734375
iter: 650 train_loss: 308640.5 val_loss: 38660.51953125
iter: 700 train_loss: 306146.625 val_loss: 38342.96484375
iter: 750 train_loss: 304377.875 val_loss: 38143.94140625
iter: 800 train_loss: 302818.6875 val_loss: 37949.09375
iter: 850 

### Evaluate test set

In [44]:
test_dataset = TextClassifierDataset(X_test, y_test)
scoring = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

test_loss = 0.0

classifier.eval()

with torch.no_grad():

    logits_store = []
    y_store = []

    for X, y in scoring:   # gives batch data
        test_logits, loss_temp = classifier(X.to('cuda'), y.to('cuda'))
        test_loss += loss_temp

        logits_store.append(test_logits)
        y_store.append(y)

    test_logits = torch.cat(logits_store)
    y_true = torch.cat(y_store)

    y_true = y_true.to('cpu')

    print(test_loss)

    classifier.train()

tensor(37321.1016, device='cuda:0')


In [45]:
cut_off = 0.5

normed_class_1 = F.softmax(test_logits, dim=1).to('cpu').numpy()[:, 1]
labels = []

for prob in normed_class_1:
    if prob > cut_off:
        labels.append(1)
    else:
        labels.append(0)

In [46]:
from sklearn.metrics import confusion_matrix

tn, fp, fn, tp = confusion_matrix(y_true.numpy(), labels).ravel()
tn, fp, fn, tp

(28694, 7811, 6767, 95980)

In [47]:
precision = tp / (tp + fp)
accuracy = (tp + tn) / (tp + fp + tn + fn)
recall = tp / y_true.numpy().sum()

In [48]:
accuracy, precision, recall

(0.895312096056071, 0.9247429931304255, 0.9341391962782368)

### Compare to classifier that just picked by most frequent

In [49]:
from sklearn.dummy import DummyClassifier

X_test = X_test.to('cpu')

clf = DummyClassifier(strategy='most_frequent')
clf.fit(X_test.numpy(), y_true)

In [50]:
y_naive = clf.predict(X_test.numpy())

In [51]:
tn, fp, fn, tp = confusion_matrix(y_true, y_naive).ravel()

In [52]:
precision = tp / (tp + fp)
accuracy = (tp + tn) / (tp + fp + tn + fn)
accuracy, precision

(0.7378493666159194, 0.7378493666159194)

In [56]:
y_true.numpy().sum() / len(y_true.numpy())

0.7378493666159194