In [1]:
pip install transformers datasets scikit-learn torch evaluate

Collecting datasets
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Co

In [4]:
import os

def load_annotations(annotations_file):
    annotations = []
    position = []
    protagonist_annotations = []
    antagonist_annotations = []
    innocent_annotations = []
    protagonist_positions = []
    antagonist_positions = []
    innocent_positions = []

    with open(annotations_file, 'r') as file:
        for line in file:
            parts = line.strip().split('\t')
            pos = [parts[2], parts[3]]
            position.append(pos)

            # Find the index where "Protagonist", "Antagonist", or "Innocent" appears
            for i, part in enumerate(parts):
                if part in ["Protagonist", "Antagonist", "Innocent"]:
                    main_info = parts[:i+1]  # Everything up to and including the found role
                    subcategories = parts[i+1:]  # Everything after the role
                    annotation_entry = main_info + [subcategories]
                    annotations.append(annotation_entry)

                    # Add to respective category list
                    if part == "Protagonist":
                        protagonist_annotations.append(annotation_entry)
                        protagonist_positions.append(pos)
                    elif part == "Antagonist":
                        antagonist_annotations.append(annotation_entry)
                        antagonist_positions.append(pos)
                    elif part == "Innocent":
                        innocent_annotations.append(annotation_entry)
                        innocent_positions.append(pos)

                    break

    return annotations, position, protagonist_annotations, antagonist_annotations, innocent_annotations, protagonist_positions, antagonist_positions, innocent_positions

def load_documents(raw_documents_folder):
    documents = {}
    for filename in os.listdir(raw_documents_folder):
        file_path = os.path.join(raw_documents_folder, filename)
        with open(file_path, 'r', encoding='utf-8') as file:
            documents[filename] = file.read()
    return documents

def process_documents(annotations_file, raw_documents_folder):
    annotations, position_array, protagonist_annotations, antagonist_annotations, innocent_annotations, protagonist_positions, antagonist_positions, innocent_positions = load_annotations(annotations_file)
    documents = load_documents(raw_documents_folder)

    result_protagonist = []
    result_antagonist = []
    result_innocent = []

    for annotation in protagonist_annotations:
        filename = annotation[0]
        if filename in documents:
            document_text = documents[filename]
            result_protagonist.append([document_text] + annotation)

    for annotation in antagonist_annotations:
        filename = annotation[0]
        if filename in documents:
            document_text = documents[filename]
            result_antagonist.append([document_text] + annotation)

    for annotation in innocent_annotations:
        filename = annotation[0]
        if filename in documents:
            document_text = documents[filename]
            result_innocent.append([document_text] + annotation)

    return result_protagonist, result_antagonist, result_innocent, protagonist_positions, antagonist_positions, innocent_positions

annotations_file = 'subtask-1-annotations.txt'
raw_documents_folder = 'raw-documents'
result_protagonist, result_antagonist, result_innocent, protagonist_positions, antagonist_positions, innocent_positions = process_documents(annotations_file, raw_documents_folder)


In [None]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [5]:
import os
from datasets import DatasetDict, Dataset
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
import numpy as np
import torch

os.environ["WANDB_DISABLED"] = "true"

# Define label mappings for each category
protagonist_labels = {"Guardian": 0, "Martyr": 1, "Peacemaker": 2, "Rebel": 3, "Underdog": 4, "Virtuous": 5}
antagonist_labels = {"Instigator": 0, "Conspirator": 1, "Tyrant": 2, "Foreign Adversary": 3, "Traitor": 4, "Spy": 5,
                     "Saboteur": 6, "Corrupt": 7, "Incompetent": 8, "Terrorist": 9, "Deceiver": 10, "Bigot": 11}
innocent_labels = {"Forgotten": 0, "Exploited": 1, "Victim": 2, "Scapegoat": 3}

# Function to map labels to integers (handling multiple labels)
def map_labels(label_list, label_mapping):
    return [label_mapping[label] for label in label_list if label in label_mapping]

# Processed data for each category
protagonist_processed_data = [
    {"text": entry[0], "label": map_labels(entry[-1], protagonist_labels)}
    for entry in result_protagonist
]

antagonist_processed_data = [
    {"text": entry[0], "label": map_labels(entry[-1], antagonist_labels)}
    for entry in result_antagonist
]

innocent_processed_data = [
    {"text": entry[0], "label": map_labels(entry[-1], innocent_labels)}
    for entry in result_innocent
]

# Print sample processed data to verify
print("Protagonist Example:", protagonist_processed_data[:2])
print("Antagonist Example:", antagonist_processed_data[:2])
print("Innocent Example:", innocent_processed_data[:2])


Protagonist Example: [{'text': 'Russia: Clashes erupt in Bashkortostan as rights activist sentenced \n\n Russian riot police clashed with protesters in Bashkortostan following the sentencing of rights activist Fail Alsynov to four years in a penal colony. Social media footage captured the confrontations near the court in Baymak, with supporters engaging in clashes with police, including throwing snowballs.\n\nViolent clashes in Baymak— NEXTA (@nexta_tv) January 17, 2024\n\nLaw enforcers used stun grenades in Baymak, Bashkortostan, Russia. The demonstrators responded by throwing snow and ice at them and forced them to retreat.\n\nIt is reported that negotiations are underway between the protesters and special forces: the law… pic.twitter.com/AVHf2gBi7w\n\nAlsynov’s conviction for inciting ethnic hatred sparked rare large-scale protests in Russia, where the risk of arrest typically stifles such demonstrations. Reports suggest thousands participated in the multi-day protest in -20°C tempe

In [None]:
#Protagonist
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
import torch
import evaluate
from transformers import AutoTokenizer
import torch.nn.functional as F

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")

special_tokens = {"additional_special_tokens": ["[TARGET]", "[/TARGET]"]}
tokenizer.add_special_tokens(special_tokens)

# Update model to use multi-label classification (sigmoid activation instead of softmax)
num_labels = 12  # Adjust according to the total number of subcategories
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels, problem_type="multi_label_classification")
model.resize_token_embeddings(len(tokenizer))

def mark_target_word(text, position_range, start_token="[TARGET]", end_token="[/TARGET]", max_tokens=512):
    start_char, end_char = position_range
    marked_text = text[:int(start_char)] + start_token + text[int(start_char):int(end_char)] + end_token + text[int(end_char):]

    tokens = tokenizer.tokenize(marked_text)

    if len(tokens) > max_tokens:
        target_start_index = len(tokenizer.tokenize(text[:int(start_char)]))
        target_end_index = target_start_index + len(tokenizer.tokenize(text[int(start_char):int(end_char)]))

        context_size = (max_tokens - (target_end_index - target_start_index)) // 2

        start_index = max(0, target_start_index - context_size)
        end_index = min(len(tokens), target_end_index + context_size)

        tokens = tokens[start_index:end_index]
        marked_text = tokenizer.convert_tokens_to_string(tokens)

    return marked_text

def tokenize_function(text):
    return tokenizer(text, padding="max_length", truncation=True, max_length=128)

tokenized_data = []

for x in range(len(protagonist_processed_data)):
    text = protagonist_processed_data[x]['text']
    labels = protagonist_processed_data[x]['label']  # Expecting a list of labels
    position_range = protagonist_positions[x]

    marked_text = mark_target_word(text, position_range)
    tokenized_text = tokenize_function(marked_text)

    # Convert labels to a multi-hot encoding format
    multi_hot_labels = torch.zeros(num_labels, dtype=torch.float)
    for label in labels:
        multi_hot_labels[label] = 1.0

    tokenized_data.append({"input_ids": tokenized_text["input_ids"], "attention_mask": tokenized_text["attention_mask"], "labels": multi_hot_labels})

class TokenizedDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return {
            "input_ids": torch.tensor(self.data[idx]["input_ids"], dtype=torch.long),
            "attention_mask": torch.tensor(self.data[idx]["attention_mask"], dtype=torch.long),
            "labels": self.data[idx]["labels"]
        }

data_train, data_val = train_test_split(tokenized_data, test_size=0.2, random_state=42)

train_dataset = TokenizedDataset(data_train)
val_dataset = TokenizedDataset(data_val)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True
)

clf_metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])

def sigmoid(x):
   return 1/(1 + np.exp(-x))

def compute_metrics(eval_pred):

   predictions, labels = eval_pred
   predictions = sigmoid(predictions)
   predictions = (predictions > 0.5).astype(int).reshape(-1)
   return clf_metrics.compute(predictions=predictions, references=labels.astype(int).reshape(-1))

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

results = trainer.evaluate()
print(f"F1 Score: {results['eval_f1']:.4f}")

model.save_pretrained("./text_classification_model")
tokenizer.save_pretrained("./text_classification_model")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/213k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/436k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [None]:
from huggingface_hub import HfApi, HfFolder
from transformers import AutoTokenizer, AutoModelForSequenceClassification

repo_name = "ProtagonistClassificationModel"

model.save_pretrained("./text_classification_model")
tokenizer.save_pretrained("./text_classification_model")

model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

print(f"Model successfully pushed to: https://huggingface.co/{repo_name}")


model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Model successfully pushed to: https://huggingface.co/ProtagonistClassificationModel


In [None]:
#Antagonist
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
import torch
import evaluate
from transformers import AutoTokenizer
import torch.nn.functional as F

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")

special_tokens = {"additional_special_tokens": ["[TARGET]", "[/TARGET]"]}
tokenizer.add_special_tokens(special_tokens)

num_labels = 12  # Adjust according to the total number of subcategories
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels, problem_type="multi_label_classification")
model.resize_token_embeddings(len(tokenizer))

def mark_target_word(text, position_range, start_token="[TARGET]", end_token="[/TARGET]", max_tokens=512):
    start_char, end_char = position_range
    marked_text = text[:int(start_char)] + start_token + text[int(start_char):int(end_char)] + end_token + text[int(end_char):]

    tokens = tokenizer.tokenize(marked_text)

    if len(tokens) > max_tokens:
        target_start_index = len(tokenizer.tokenize(text[:int(start_char)]))
        target_end_index = target_start_index + len(tokenizer.tokenize(text[int(start_char):int(end_char)]))

        context_size = (max_tokens - (target_end_index - target_start_index)) // 2

        start_index = max(0, target_start_index - context_size)
        end_index = min(len(tokens), target_end_index + context_size)

        tokens = tokens[start_index:end_index]
        marked_text = tokenizer.convert_tokens_to_string(tokens)

    return marked_text

def tokenize_function(text):
    return tokenizer(text, padding="max_length", truncation=True, max_length=128)

tokenized_data = []

for x in range(len(antagonist_processed_data)):
    text = antagonist_processed_data[x]['text']
    labels = antagonist_processed_data[x]['label']  # Expecting a list of labels
    position_range = antagonist_positions[x]

    marked_text = mark_target_word(text, position_range)
    tokenized_text = tokenize_function(marked_text)

    # Convert labels to a multi-hot encoding format
    multi_hot_labels = torch.zeros(num_labels, dtype=torch.float)
    for label in labels:
        multi_hot_labels[label] = 1.0

    tokenized_data.append({"input_ids": tokenized_text["input_ids"], "attention_mask": tokenized_text["attention_mask"], "labels": multi_hot_labels})

class TokenizedDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return {
            "input_ids": torch.tensor(self.data[idx]["input_ids"], dtype=torch.long),
            "attention_mask": torch.tensor(self.data[idx]["attention_mask"], dtype=torch.long),
            "labels": self.data[idx]["labels"]
        }

data_train, data_val = train_test_split(tokenized_data, test_size=0.2, random_state=42)

train_dataset = TokenizedDataset(data_train)
val_dataset = TokenizedDataset(data_val)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True
)

clf_metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])

def sigmoid(x):
   return 1/(1 + np.exp(-x))

def compute_metrics(eval_pred):
   print(eval_pred)
   predictions, labels = eval_pred
   predictions = sigmoid(predictions)
   predictions = (predictions > 0.5).astype(int).reshape(-1)
   return clf_metrics.compute(predictions=predictions, references=labels.astype(int).reshape(-1))

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

results = trainer.evaluate()
print(f"F1 Score: {results['eval_f1']:.4f}")

model.save_pretrained("./text_classification_model")
tokenizer.save_pretrained("./text_classification_model")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Token indices sequence length is longer than the specified maximum sequence length for this model (856 > 512). Running this sequence through the model will result in indexing errors
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,0.3654,0.327563,0.908854,0.0,0.0,0.0
2,0.3095,0.303946,0.908854,0.0,0.0,0.0
3,0.3048,0.299459,0.908854,0.0,0.0,0.0


<transformers.trainer_utils.EvalPrediction object at 0x7834aeb9c8d0>


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


<transformers.trainer_utils.EvalPrediction object at 0x7834b0b92310>


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


<transformers.trainer_utils.EvalPrediction object at 0x78351805bcd0>


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


<transformers.trainer_utils.EvalPrediction object at 0x7834b4c2aed0>
F1 Score: 0.0000


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


('./text_classification_model/tokenizer_config.json',
 './text_classification_model/special_tokens_map.json',
 './text_classification_model/vocab.txt',
 './text_classification_model/added_tokens.json',
 './text_classification_model/tokenizer.json')

In [None]:
from huggingface_hub import HfApi, HfFolder
from transformers import AutoTokenizer, AutoModelForSequenceClassification

repo_name = "AntagonistClassificationModel"

model.save_pretrained("./text_classification_model")
tokenizer.save_pretrained("./text_classification_model")

model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

print(f"Model successfully pushed to: https://huggingface.co/{repo_name}")


model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Model successfully pushed to: https://huggingface.co/AntagonistClassificationModel


In [None]:
#Innocent
from sklearn.model_selection import train_test_split
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset, DataLoader
import torch
import evaluate
from transformers import AutoTokenizer
import torch.nn.functional as F

# Initialize tokenizer
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")

special_tokens = {"additional_special_tokens": ["[TARGET]", "[/TARGET]"]}
tokenizer.add_special_tokens(special_tokens)

# Update model to use multi-label classification (sigmoid activation instead of softmax)
num_labels = 4  # Adjust according to the total number of subcategories
model = BertForSequenceClassification.from_pretrained("bert-base-uncased", num_labels=num_labels, problem_type="multi_label_classification")
model.resize_token_embeddings(len(tokenizer))

def mark_target_word(text, position_range, start_token="[TARGET]", end_token="[/TARGET]", max_tokens=512):
    start_char, end_char = position_range
    marked_text = text[:int(start_char)] + start_token + text[int(start_char):int(end_char)] + end_token + text[int(end_char):]

    tokens = tokenizer.tokenize(marked_text)

    if len(tokens) > max_tokens:
        target_start_index = len(tokenizer.tokenize(text[:int(start_char)]))
        target_end_index = target_start_index + len(tokenizer.tokenize(text[int(start_char):int(end_char)]))

        context_size = (max_tokens - (target_end_index - target_start_index)) // 2

        start_index = max(0, target_start_index - context_size)
        end_index = min(len(tokens), target_end_index + context_size)

        tokens = tokens[start_index:end_index]
        marked_text = tokenizer.convert_tokens_to_string(tokens)

    return marked_text

def tokenize_function(text):
    return tokenizer(text, padding="max_length", truncation=True, max_length=128)

tokenized_data = []

for x in range(len(innocent_processed_data)):
    text = innocent_processed_data[x]['text']
    labels = innocent_processed_data[x]['label']  # Expecting a list of labels
    position_range = innocent_positions[x]

    marked_text = mark_target_word(text, position_range)
    tokenized_text = tokenize_function(marked_text)

    # Convert labels to a multi-hot encoding format
    multi_hot_labels = torch.zeros(num_labels, dtype=torch.float)
    for label in labels:
        multi_hot_labels[label] = 1.0

    tokenized_data.append({"input_ids": tokenized_text["input_ids"], "attention_mask": tokenized_text["attention_mask"], "labels": multi_hot_labels})

class TokenizedDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return {
            "input_ids": torch.tensor(self.data[idx]["input_ids"], dtype=torch.long),
            "attention_mask": torch.tensor(self.data[idx]["attention_mask"], dtype=torch.long),
            "labels": self.data[idx]["labels"]
        }

data_train, data_val = train_test_split(tokenized_data, test_size=0.2, random_state=42)

train_dataset = TokenizedDataset(data_train)
val_dataset = TokenizedDataset(data_val)

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=10,
    load_best_model_at_end=True
)

clf_metrics = evaluate.combine(["accuracy", "f1", "precision", "recall"])

def sigmoid(x):
   return 1/(1 + np.exp(-x))

def compute_metrics(eval_pred):

   predictions, labels = eval_pred
   predictions = sigmoid(predictions)
   predictions = (predictions > 0.5).astype(int).reshape(-1)
   return clf_metrics.compute(predictions=predictions, references=labels.astype(int).reshape(-1))

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    tokenizer=tokenizer,
    compute_metrics=compute_metrics
)

trainer.train()

results = trainer.evaluate()
print(f"F1 Score: {results['eval_f1']:.4f}")

model.save_pretrained("./text_classification_model")
tokenizer.save_pretrained("./text_classification_model")


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
Token indices sequence length is longer than the specified maximum sequence length for this model (834 > 512). Running this sequence through the model will result in indexing errors
Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).
  trainer = Trainer(


Epoch,Training Loss,Validation Loss,Accuracy,F1,Precision,Recall
1,No log,0.496522,0.854167,0.5625,0.5625,0.5625
2,0.529300,0.450528,0.854167,0.5625,0.5625,0.5625
3,0.397900,0.43379,0.854167,0.5625,0.5625,0.5625


F1 Score: 0.5625


('./text_classification_model/tokenizer_config.json',
 './text_classification_model/special_tokens_map.json',
 './text_classification_model/vocab.txt',
 './text_classification_model/added_tokens.json',
 './text_classification_model/tokenizer.json')

In [None]:
from huggingface_hub import HfApi, HfFolder
from transformers import AutoTokenizer, AutoModelForSequenceClassification

repo_name = "InnocentClassificationModel"

model.save_pretrained("./text_classification_model")
tokenizer.save_pretrained("./text_classification_model")

model.push_to_hub(repo_name)
tokenizer.push_to_hub(repo_name)

print(f"Model successfully pushed to: https://huggingface.co/{repo_name}")


model.safetensors:   0%|          | 0.00/433M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/5.17k [00:00<?, ?B/s]

Model successfully pushed to: https://huggingface.co/InnocentClassificationModel
