In [81]:
##########################################################################
##### machine learning script for constructivity ######
## based upon the chatGPT_test_script2 script #########
##########################################################################
### Step 1: install packages - 
##### packages are necessary to install and load, given that they have the built in functions necessary to run complex tasks. 
## They effectively act as one of the most crucial time saving activities that would otherwise lead to overly long and 
## duplicative scripts. 
!{sys.executable} -m pip install tiktoken
!{sys.executable} -m pip install openai

### note: you are not expected to remember all of these; just for the best to copy and paste these sections 

## read in pkgs 
import sys
import os
# !{sys.executable} -m pip install xgboost==1.7.5 # note: needed since it looks like anaconda installs an earlier version 
# of the package, which is not helpful. 1.7.5 allows for the categorical data of interest to be used. 

# !{sys.executable} -m pip install requests #; this code here can be used to install packages on anaconda/jupyter notebook 
### I believe the below should be installed by default 
import requests # web scraping 
from bs4 import BeautifulSoup # for web scraping 
import itertools # for efficient operation of loops 
import pandas as pd # necessary for reading in, creating, and manipulating data frames 
import csv ## for importing/exporting csvs 
import glob ## for finding files in path
import re
import numpy as np
import tiktoken
import openai



In [82]:
## install the pkgs unlikely to be pre-installed 
!{sys.executable} -m pip install torch
!{sys.executable} -m pip install transformers
### import torch packages and such 

import torch
from torch.utils.data import Dataset, DataLoader
from transformers import GPT2Tokenizer, GPT2LMHeadModel, GPT2Config
### these were loaded in successfully 



In [83]:
# Define your labeled dataset class
class LabeledDataset(Dataset):
    def __init__(self, tokenizer, comments, labels):
        self.tokenizer = tokenizer
        self.comments = comments
        self.labels = labels

    def __len__(self):
        return len(self.comments)

    def __getitem__(self, index):
        comment = self.comments[index]
        label = self.labels[index]
        inputs = self.tokenizer.encode_plus(comment, add_special_tokens=True, padding='max_length', max_length=128, truncation=True)
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        return {
            'input_ids': torch.tensor(input_ids, dtype=torch.long),
            'attention_mask': torch.tensor(attention_mask, dtype=torch.long),
            'labels': torch.tensor(label, dtype=torch.long)
        }

In [84]:
# Tokenizer and model configuration
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
#config = GPT2Config.from_pretrained('gpt2', num_labels=2)  # 2 classes: toxic and non-toxic; can expand as needed 
config = GPT2Config.from_pretrained('gpt2')

In [85]:
#### import the data from OSU comments 
rmp_df = pd.read_csv('coding/text_cleaning_data/scored_rmp_data_osu_final.csv')
rmp_df

Unnamed: 0,row,quality_of_class,difficulty_of_class,class_code,college,prof_firstname,prof_lastname,comment,out_misrep,out_emo_lang,...,pb_origin,pb_nuero_div,pb_phys_able,pb_pol_affil,complex,constructive,reflective,outrage_agg,personal_attack_agg,prejudice_agg
0,1,2.0,4,POLITSC3500,OHIO STATE UNIVERSITY,ALEX,ACS,"Only graded on 4 assignments (30% Midterm, 30%...",0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
1,2,3.0,4,POLITSC3500,OHIO STATE UNIVERSITY,ALEX,ACS,"Final grade is only based on two exams, readin...",0,0,...,0.0,0.0,0,0.0,1,0,0,0,0,0.0
2,3,4.0,1,POLITSC1100,OHIO STATE UNIVERSITY,ALEX,ACS,Class was super easy. One reading quiz a week ...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
3,4,4.0,2,POLITSC3115,OHIO STATE UNIVERSITY,ALEX,ACS,"Lecture could be dry at times, but I still lik...",0,0,...,0.0,0.0,0,0.0,0,0,1,0,0,0.0
4,5,5.0,3,POLITSC3115,OHIO STATE UNIVERSITY,ALEX,ACS,"was an excellent lecturer. Insightful, even h...",0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2795,16,5.0,3,PSYCH1100H,OHIO STATE UNIVERSITY,ANNA,YOCOM,"I had Dr. for PSYCH 1100H last semester, and ...",0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
2796,17,5.0,3,PSYCH2200,OHIO STATE UNIVERSITY,ANNA,YOCOM,A great professor for this class. You only nee...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
2797,18,5.0,3,PSY1100H,OHIO STATE UNIVERSITY,ANNA,YOCOM,LOVED this class! Prof made me want to attend...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
2798,19,5.0,3,PSY2220,OHIO STATE UNIVERSITY,ANNA,YOCOM,Dr. made this class much better than I expect...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0


In [86]:
### winnow down in order to create more balanced data 
constructive_df = rmp_df[rmp_df['constructive']>0]
unconstructive_df = rmp_df[rmp_df['constructive']==0]
constructive_df

Unnamed: 0,row,quality_of_class,difficulty_of_class,class_code,college,prof_firstname,prof_lastname,comment,out_misrep,out_emo_lang,...,pb_origin,pb_nuero_div,pb_phys_able,pb_pol_affil,complex,constructive,reflective,outrage_agg,personal_attack_agg,prejudice_agg
7,8,2.0,5,POLITSC3115,OHIO STATE UNIVERSITY,ALEX,ACS,Many questions are completely ambiguous or hav...,1,0,...,0.0,0.0,0,0.0,2,2,0,1,0,0.0
38,1,3.5,5,H508,OHIO STATE UNIVERSITY,HAL,ARKES,Dr. was a very good professor. I really had n...,0,0,...,0.0,0.0,0,0.0,0,2,0,0,0,0.0
64,16,3.0,3,POLI502,OHIO STATE UNIVERSITY,HERB,ASHER,"decent professor and a funny guy, although you...",0,0,...,0.0,0.0,0,0.0,1,2,0,0,0,0.0
94,18,2.0,4,POLITSC4138,OHIO STATE UNIVERSITY,LAWRENCE,BAUM,He does not use any kind of slides or another ...,0,0,...,0.0,0.0,0,0.0,1,2,1,0,0,0.0
137,1,5.0,3,SOC5925,OHIO STATE UNIVERSITY,PAUL,BELLAIR,Dr. is an excellent professor. He really care...,0,0,...,0.0,0.0,0,0.0,0,1,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2634,1,3.0,2,PS3280,OHIO STATE UNIVERSITY,SARA,WATSON,"Very kind, took 4285 with her last semester, d...",0,0,...,0.0,0.0,0,0.0,0,1,1,0,0,0.0
2635,2,3.0,3,POLITSC4285,OHIO STATE UNIVERSITY,SARA,WATSON,Extremely nice & knowledgeable yet unorganized...,0,0,...,0.0,0.0,0,0.0,1,1,0,0,0,0.0
2710,2,1.0,5,PHYS367,OHIO STATE UNIVERSITY,JOHN,WILKINS,His class was the most infuriating class I've ...,0,0,...,0.0,0.0,0,0.0,0,1,0,0,0,0.0
2712,1,1.0,4,SOCIOL3549,OHIO STATE UNIVERSITY,KRISTI,WILLIAMS,If you need to take this course and struggle w...,1,0,...,0.0,0.0,0,0.0,1,1,0,1,0,0.0


In [87]:
### lets test replace vals in column 
constructive_df['constructive'].values[constructive_df['constructive']>1] = 1
#constructive_df['constructive'].unique()

In [88]:
### now we will want to get 77 randomly sampled unconstructive comments 
import random
random.seed(1337)
unconstructive_df = unconstructive_df.sample(n=77)
unconstructive_df

Unnamed: 0,row,quality_of_class,difficulty_of_class,class_code,college,prof_firstname,prof_lastname,comment,out_misrep,out_emo_lang,...,pb_origin,pb_nuero_div,pb_phys_able,pb_pol_affil,complex,constructive,reflective,outrage_agg,personal_attack_agg,prejudice_agg
2076,7,5.0,3,SOCIOL3410,OHIO STATE UNIVERSITY,MARY LIA,REITER,She is so fun and you can tell she genuinely c...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
1651,2,4.0,5,POLITSC1300,OHIO STATE UNIVERSITY,JENNIFER,MITZEN,This professor as a person was amazing. Very i...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
951,12,5.0,3,PSYCHH100,OHIO STATE UNIVERSITY,DAVID,HOTHERSALL,Dr. may be the best professor I've had in col...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
2774,4,1.0,4,PSYCH1100,OHIO STATE UNIVERSITY,JAMES,WIRTH,Great person in general and professor in gener...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
890,9,2.0,2,ML4201,OHIO STATE UNIVERSITY,CURTIS,HAUGTVEDT,Class is almost entirely based off of Ted talk...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1796,15,5.0,1,PSYCH508,OHIO STATE UNIVERSITY,THOMAS,NYGREN,Dr. was the best professor I ever had! So nic...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
293,2,2.0,4,POLITSC3780,OHIO STATE UNIVERSITY,BEAR,BRAUMOELLER,"Do not take 3780! They ""teach"" you with YouTub...",1,2,...,0.0,0.0,0,0.0,0,0,0,5,0,0.0
1460,12,4.0,4,BUS400,OHIO STATE UNIVERSITY,ROBERT,"LOUNT, JR","Prof Lount himself is not a bad guy, but the c...",0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
2141,5,1.0,2,PSYCH4555,OHIO STATE UNIVERSITY,LESLIE,RUDY,"She was not helpful at all, and was often rude...",0,0,...,0.0,0.0,0,0.0,0,0,0,2,2,0.0


In [89]:
### good, no let's rebind 
rmp_df_test = constructive_df.append(unconstructive_df)
rmp_df_test.reset_index()
rmp_df_test ## works 

Unnamed: 0,row,quality_of_class,difficulty_of_class,class_code,college,prof_firstname,prof_lastname,comment,out_misrep,out_emo_lang,...,pb_origin,pb_nuero_div,pb_phys_able,pb_pol_affil,complex,constructive,reflective,outrage_agg,personal_attack_agg,prejudice_agg
7,8,2.0,5,POLITSC3115,OHIO STATE UNIVERSITY,ALEX,ACS,Many questions are completely ambiguous or hav...,1,0,...,0.0,0.0,0,0.0,2,1,0,1,0,0.0
38,1,3.5,5,H508,OHIO STATE UNIVERSITY,HAL,ARKES,Dr. was a very good professor. I really had n...,0,0,...,0.0,0.0,0,0.0,0,1,0,0,0,0.0
64,16,3.0,3,POLI502,OHIO STATE UNIVERSITY,HERB,ASHER,"decent professor and a funny guy, although you...",0,0,...,0.0,0.0,0,0.0,1,1,0,0,0,0.0
94,18,2.0,4,POLITSC4138,OHIO STATE UNIVERSITY,LAWRENCE,BAUM,He does not use any kind of slides or another ...,0,0,...,0.0,0.0,0,0.0,1,1,1,0,0,0.0
137,1,5.0,3,SOC5925,OHIO STATE UNIVERSITY,PAUL,BELLAIR,Dr. is an excellent professor. He really care...,0,0,...,0.0,0.0,0,0.0,0,1,0,0,0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1796,15,5.0,1,PSYCH508,OHIO STATE UNIVERSITY,THOMAS,NYGREN,Dr. was the best professor I ever had! So nic...,0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
293,2,2.0,4,POLITSC3780,OHIO STATE UNIVERSITY,BEAR,BRAUMOELLER,"Do not take 3780! They ""teach"" you with YouTub...",1,2,...,0.0,0.0,0,0.0,0,0,0,5,0,0.0
1460,12,4.0,4,BUS400,OHIO STATE UNIVERSITY,ROBERT,"LOUNT, JR","Prof Lount himself is not a bad guy, but the c...",0,0,...,0.0,0.0,0,0.0,0,0,0,0,0,0.0
2141,5,1.0,2,PSYCH4555,OHIO STATE UNIVERSITY,LESLIE,RUDY,"She was not helpful at all, and was often rude...",0,0,...,0.0,0.0,0,0.0,0,0,0,2,2,0.0


In [90]:
## with that read in, let's get the comments and labels specified 
comments = rmp_df_test['comment']
labels_const = rmp_df_test['constructive']
comments = comments.tolist()
labels_const = labels_const.tolist()
type(labels_const) ## want to make just a list. 

list

In [91]:
### let's see if it is th emutliple cats causing issues; let's make into binary
labels_const

## when I changed a value to 2 in the chatGPT_test_script2, everything still worked. This implies that there is some other 
# error going on. 

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0]

In [92]:
# Create the labeled dataset object
tokenizer.add_special_tokens({'pad_token': '0'})
dataset = LabeledDataset(tokenizer, comments, labels_const) # tokenizer above (from gpt2), 
#with comments and labels from the RMP data set 
# and the labels the 0s and 1s 

# Data loader
batch_size = 2
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
len(dataloader) # lrngth now mismatch; diff than dataset 

77

In [93]:
# Initialize the model
model = GPT2LMHeadModel.from_pretrained('gpt2', config=config) #  GPT2 Model transformer with a language modeling
# head on top (linear layer with weights tied to the input embeddings).
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Fine-tuning parameters
num_epochs = 5
learning_rate = 2e-5
dataloader

<torch.utils.data.dataloader.DataLoader at 0x1ea03769490>

In [39]:
### chek the labels just to be safe 
# labels_const.unique() #good
# the script below taking way too long; lets winnow down the data set
#labels_const

#dataloader[[0]]

In [94]:
# The model itself. Let's see if we can't get this working 
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
loss_fn = torch.nn.BCEWithLogitsLoss()
### problem remains even if we change data solely to 0 or 1 
### managed to get the same error. Therefore, we should be able to work here. 
## we reset the index, then switched to 0 and 1, and it got to the model part. Let's see what happens.
#IF we just leave it like so 
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
        ## debug print here 
    print("got here")
    for batch in dataloader:

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        print(labels)
        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
        # print(outputs)
        loss = outputs.loss
        logits = outputs.logits
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs} - Average Loss: {avg_loss:.4f}")

# Save the fine-tuned model
save_path = 'coding/models'

model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)

got here
tensor([1, 0])
tensor([0, 0])
tensor([0, 0])
tensor([1, 1])
tensor([1, 1])
tensor([1, 0])
tensor([0, 0])
tensor([0, 1])
tensor([0, 1])
tensor([1, 1])
tensor([1, 1])
tensor([1, 0])
tensor([1, 0])
tensor([0, 1])
tensor([0, 0])
tensor([0, 0])
tensor([1, 1])
tensor([1, 0])
tensor([0, 0])
tensor([0, 0])
tensor([0, 0])
tensor([1, 1])
tensor([1, 1])
tensor([1, 0])
tensor([0, 0])
tensor([1, 0])
tensor([0, 1])
tensor([0, 0])
tensor([0, 0])
tensor([1, 1])
tensor([1, 1])
tensor([1, 1])
tensor([0, 0])
tensor([1, 1])
tensor([1, 0])
tensor([0, 0])
tensor([1, 1])
tensor([0, 0])
tensor([1, 0])
tensor([0, 1])
tensor([0, 1])
tensor([0, 1])
tensor([0, 1])
tensor([0, 1])
tensor([1, 0])
tensor([1, 0])
tensor([1, 1])
tensor([1, 1])
tensor([1, 0])
tensor([1, 1])
tensor([1, 1])
tensor([0, 0])
tensor([0, 0])
tensor([0, 1])
tensor([0, 0])
tensor([1, 1])
tensor([1, 0])
tensor([1, 1])
tensor([0, 0])
tensor([1, 0])
tensor([0, 1])
tensor([1, 0])
tensor([0, 1])
tensor([0, 1])
tensor([0, 1])
tensor([0, 1])
t

('coding/models\\tokenizer_config.json',
 'coding/models\\special_tokens_map.json',
 'coding/models\\vocab.json',
 'coding/models\\merges.txt',
 'coding/models\\added_tokens.json')

In [96]:
### read in the coding model 
config1 = model.config
config1 # GPT2LMHeadModel

GPT2Config {
  "_name_or_path": "gpt2",
  "activation_function": "gelu_new",
  "architectures": [
    "GPT2LMHeadModel"
  ],
  "attn_pdrop": 0.1,
  "bos_token_id": 50256,
  "embd_pdrop": 0.1,
  "eos_token_id": 50256,
  "initializer_range": 0.02,
  "layer_norm_epsilon": 1e-05,
  "model_type": "gpt2",
  "n_ctx": 1024,
  "n_embd": 768,
  "n_head": 12,
  "n_inner": null,
  "n_layer": 12,
  "n_positions": 1024,
  "reorder_and_upcast_attn": false,
  "resid_pdrop": 0.1,
  "scale_attn_by_inverse_layer_idx": false,
  "scale_attn_weights": true,
  "summary_activation": null,
  "summary_first_dropout": 0.1,
  "summary_proj_to_labels": true,
  "summary_type": "cls_index",
  "summary_use_proj": true,
  "task_specific_params": {
    "text-generation": {
      "do_sample": true,
      "max_length": 50
    }
  },
  "torch_dtype": "float32",
  "transformers_version": "4.30.2",
  "use_cache": true,
  "vocab_size": 50257
}

In [161]:
### test apply a model 
#tokenizer = AutoTokenizer.from_pretrained("gpt2")
inputs = tokenizer(comments[0], return_tensors="pt")
inputs ## works in tokenizing 

{'input_ids': tensor([[ 7085,  2683,   389,  3190, 27102,   393,   423, 24916,  1321,   543,
          1838,   262, 44066,  8458,    12,  3106,    13, 11075,  4966,   503,
           286,   640,    11,  2811,   373,  7317,   281,   807,    15,  7225,
          2735,   340,   338,   257,  8699,  7441,   290,   356,   821, 10416,
          1771,   339, 26929,   340,   866,    30,  1400, 19143,   664,    82,
            11, 13677, 14858,    11,  3095, 38707,  1280,   379,   860,  2390,
           329,   352,  1711,    11,  1222,   645, 19392,  2716,  1566,  2745,
           706,   465, 25917,    13]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
         1, 1]])}

In [162]:
outputs1 = model(**inputs)
outputs1

CausalLMOutputWithCrossAttentions(loss=None, logits=tensor([[[-120.2931, -121.4085, -125.3487,  ..., -124.3350, -125.6643,
          -121.5608],
         [-148.2143, -151.4218, -155.6602,  ..., -162.2290, -160.5134,
          -152.6230],
         [-146.3144, -148.5823, -152.2780,  ..., -153.3136, -153.0187,
          -150.8448],
         ...,
         [-141.9021, -144.8324, -147.3861,  ..., -152.7504, -153.6962,
          -144.9907],
         [-107.2739, -114.3130, -113.4910,  ..., -125.5334, -125.4026,
          -113.3217],
         [-145.2318, -147.3520, -144.9373,  ..., -153.7805, -156.0593,
          -143.3172]]], grad_fn=<UnsafeViewBackward0>), past_key_values=((tensor([[[[-0.9450,  1.3250,  0.4200,  ..., -0.9733, -0.1052,  1.0266],
          [-2.7381,  1.8381,  1.4737,  ..., -0.5747, -0.9686,  2.3472],
          [-2.0625,  2.3397,  2.2165,  ..., -1.1776, -2.5529,  2.7262],
          ...,
          [-1.6546,  2.6567,  2.4040,  ..., -0.5772, -1.1193,  1.1970],
          [-1.4731,  

In [151]:
### create inv logit fxn 
def inv_logit(p):
    return np.exp(p) / (1+np.exp(p))
def logit(p):
    return np.log(p) - np.log(1 - p)

In [164]:
## try and predict 
pred_logits = outputs1.logits
#print(pred_logits)
probs = pred_logits.softmax(dim=-1).detach().cpu().flatten().numpy().tolist()
#print(probs)
test_val =sum(probs)/len(probs) ### gets us the max val
#inv_logit(sum(probs))
test_val

1.9898105396173964e-05

In [143]:
### see what happens if we try the inv logit on max 
inv_logit(-100)

3.720075976020836e-44

In [159]:
test_val

1.989803192823793e-05

In [168]:
#!{sys.executable} -m pip install TensorFlow
#import TensorFlow as tf
#tf.math.reduce.sum(logits1)
outputs1.logits.softmax(-1)

tensor([[[8.5900e-05, 2.8157e-05, 5.4748e-07,  ..., 1.5087e-06,
          3.9932e-07, 2.4180e-05],
         [6.6203e-03, 2.6783e-04, 3.8649e-06,  ..., 5.4244e-09,
          3.0159e-08, 8.0576e-05],
         [3.9390e-05, 4.0780e-06, 1.0126e-07,  ..., 3.5946e-08,
          4.8276e-08, 4.2448e-07],
         ...,
         [1.2152e-04, 6.4865e-06, 5.0464e-07,  ..., 2.3620e-09,
          9.1734e-10, 5.5368e-06],
         [2.6155e-02, 2.2935e-05, 5.2176e-05,  ..., 3.0729e-10,
          3.5021e-10, 6.1802e-05],
         [8.3408e-05, 1.0010e-05, 1.1197e-04,  ..., 1.6165e-08,
          1.6554e-09, 5.6588e-04]]], grad_fn=<SoftmaxBackward0>)

In [174]:
test_softmax=outputs1.logits.softmax(1).detach() # the different number pull up different vals at least 

In [175]:
# len(probs) # 3719018
max(test_softmax)

tensor([[6.2566e-21, 2.1409e-19, 7.9503e-20,  ..., 5.9807e-18, 1.8039e-17,
         6.8534e-19],
        [4.6810e-33, 1.9770e-32, 5.4484e-33,  ..., 2.0874e-34, 1.3225e-32,
         2.2170e-32],
        [3.1293e-32, 3.3821e-31, 1.6039e-31,  ..., 1.5542e-30, 2.3787e-29,
         1.3123e-31],
        ...,
        [2.5804e-30, 1.4379e-29, 2.1364e-29,  ..., 2.7297e-30, 1.2081e-29,
         4.5750e-29],
        [2.8218e-15, 2.5832e-16, 1.1223e-14,  ..., 1.8044e-18, 2.3434e-17,
         2.5946e-15],
        [9.2383e-32, 1.1574e-30, 2.4726e-28,  ..., 9.7444e-31, 1.1371e-30,
         2.4390e-28]])

In [None]:
# Initialize the model
# GPT2DoubleHeadsModel for the multiclass 
model = GPT2LMHeadModel.from_pretrained('gpt2', config=config) #  GPT2 Model transformer with a language modeling
# head on top (linear layer with weights tied to the input embeddings).
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

# Fine-tuning parameters
num_epochs = 5
learning_rate = 2e-5
dataloader
### let's try the model, though this time with a diff chatgpt 
# The model itself. Let's see if we can't get this working 
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)
loss_fn = torch.nn.BCEWithLogitsLoss()
### problem remains even if we change data solely to 0 or 1 
### managed to get the same error. Therefore, we should be able to work here. 
## we reset the index, then switched to 0 and 1, and it got to the model part. Let's see what happens.
#IF we just leave it like so 
for epoch in range(num_epochs):
    model.train()
    total_loss = 0.0
        ## debug print here 
    print("got here")
    for batch in dataloader:

        input_ids = batch['input_ids'].to(device)
        attention_mask = batch['attention_mask'].to(device)
        labels = batch['labels'].to(device)
        print(labels)
        optimizer.zero_grad()
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, labels=input_ids)
        # print(outputs)
        loss = outputs.loss
        logits = outputs.logits
        total_loss += loss.item()

        loss.backward()
        optimizer.step()

    avg_loss = total_loss / len(dataloader)
    print(f"Epoch {epoch+1}/{num_epochs} - Average Loss: {avg_loss:.4f}")

# Save the fine-tuned model
save_path = 'coding/models'

model.save_pretrained(save_path)
tokenizer.save_pretrained(save_path)