In [None]:
import torch

def is_torch_greater_or_equal_than_1_13():
    return torch.__version__ >= '1.13'

# Test the function
print(is_torch_greater_or_equal_than_1_13())

In [None]:
import math
import warnings
from typing import List, Optional, Tuple, Union
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from transformers.activations import ACT2FN
from transformers.cache_utils import Cache, DynamicCache
from transformers.modeling_attn_mask_utils import (
    AttentionMaskConverter,
    _prepare_4d_attention_mask,
    _prepare_4d_causal_attention_mask,
    _prepare_4d_causal_attention_mask_for_sdpa,
)
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import (
    add_start_docstrings,
    add_start_docstrings_to_model_forward,
    is_flash_attn_2_available,
    is_flash_attn_greater_or_equal_2_10,
    logging,
    replace_return_docstrings,
)
from transformers.utils.import_utils import is_torch_fx_available

# Logging configuration
logger = logging.get_logger(__name__)

In [5]:
# Custom function to check torch version
def is_torch_greater_or_equal_than_1_13():
    return torch.__version__ >= '1.13'

In [6]:
import math
import warnings
from typing import List, Optional, Tuple, Union
import torch
import torch.nn.functional as F
import torch.utils.checkpoint
from torch import nn
from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
from transformers.activations import ACT2FN
from transformers.cache_utils import Cache, DynamicCache
from transformers.modeling_attn_mask_utils import (
    AttentionMaskConverter,
    _prepare_4d_attention_mask,
    _prepare_4d_causal_attention_mask,
    _prepare_4d_causal_attention_mask_for_sdpa,
)
from transformers.modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, SequenceClassifierOutputWithPast
from transformers.modeling_utils import PreTrainedModel
from transformers.utils import (
    add_start_docstrings,
    add_start_docstrings_to_model_forward,
    is_flash_attn_2_available,
    is_flash_attn_greater_or_equal_2_10,
    logging,
    replace_return_docstrings,
)
from transformers.utils.import_utils import is_torch_fx_available

# Logging configuration
logger = logging.get_logger(__name__)

# Custom function to check torch version
def is_torch_greater_or_equal_than_1_13():
    return torch.__version__ >= '1.13'

class SimpleTransformerModel(PreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        
        self.embedding = nn.Embedding(config.vocab_size, config.hidden_size)
        self.transformer_layer = nn.TransformerEncoderLayer(
            d_model=config.hidden_size, 
            nhead=config.num_attention_heads, 
            dim_feedforward=4*config.hidden_size, 
            dropout=config.hidden_dropout_prob
        )
        self.transformer_encoder = nn.TransformerEncoder(self.transformer_layer, num_layers=config.num_hidden_layers)
        self.classifier = nn.Linear(config.hidden_size, config.num_labels)
        
        self.init_weights()

    def forward(
        self,
        input_ids: Optional[torch.Tensor] = None,
        attention_mask: Optional[torch.Tensor] = None,
        labels: Optional[torch.Tensor] = None,
    ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutputWithPast]:
        # Embed the input ids
        embeddings = self.embedding(input_ids)
        
        # Prepare attention mask if provided
        if attention_mask is not None:
            attention_mask = _prepare_4d_attention_mask(attention_mask, embeddings.dtype)

        # Pass through transformer encoder
        transformer_output = self.transformer_encoder(embeddings.transpose(0, 1), src_key_padding_mask=(attention_mask == 0)).transpose(0, 1)
        
        # Pool the output by taking the first token (CLS token equivalent)
        pooled_output = transformer_output[:, 0]
        
        # Pass through classifier
        logits = self.classifier(pooled_output)
        
        loss = None
        if labels is not None:
            if self.config.problem_type is None:
                if self.config.num_labels == 1:
                    self.config.problem_type = "regression"
                elif self.config.num_labels > 1 and (labels.dtype == torch.long or labels.dtype == torch.int):
                    self.config.problem_type = "single_label_classification"
                else:
                    self.config.problem_type = "multi_label_classification"

            if self.config.problem_type == "regression":
                loss_fct = MSELoss()
                if self.config.num_labels == 1:
                    loss = loss_fct(logits.squeeze(), labels.squeeze())
                else:
                    loss = loss_fct(logits, labels)
            elif self.config.problem_type == "single_label_classification":
                loss_fct = CrossEntropyLoss()
                loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))
            elif self.config.problem_type == "multi_label_classification":
                loss_fct = BCEWithLogitsLoss()
                loss = loss_fct(logits, labels)
        
        return SequenceClassifierOutputWithPast(
            loss=loss,
            logits=logits,
            past_key_values=None,  # Not used in this simple model
        )

# Example usage:
# Assuming you have a configuration object `config` with necessary parameters like vocab_size, hidden_size, etc.
# model = SimpleTransformerModel(config)
# output = model(input_ids, attention_mask=attention_mask, labels=labels)

In [None]:
import torch
from torch.utils.data import DataLoader
from transformers import BertTokenizerFast, Trainer, TrainingArguments
from datasets import load_dataset
from evaluate import load
import numpy as np

In [None]:
from datasets import load_dataset
from transformers import BertTokenizerFast

# IMDb veri setini yükleme
dataset = load_dataset('imdb')

# Tokenizer seçimi
tokenizer = BertTokenizerFast.from_pretrained('bert-base-uncased')

# Tokenize fonksiyonu
def tokenize_function(examples):
    return tokenizer(examples['text'], padding="max_length", truncation=True, max_length=512)

# Tokenized verileri hazırlama
tokenized_datasets = dataset.map(tokenize_function, batched=True)

# Gereksiz sütunları kaldırma
tokenized_datasets = tokenized_datasets.remove_columns(['text'])

# PyTorch tensör formatına dönüştürme
tokenized_datasets.set_format('torch')

# Eğitim ve test veri setlerini ayırma
train_dataset = tokenized_datasets['train']
test_dataset = tokenized_datasets['test']

# Örnek veri kontrolü
print(train_dataset[0])
print(test_dataset[0])

In [None]:
from transformers import DataCollatorWithPadding

data_collator = DataCollatorWithPadding(tokenizer)

train_loader = DataLoader(train_dataset, batch_size=8, shuffle=True, collate_fn=data_collator)
test_loader = DataLoader(test_dataset, batch_size=8, shuffle=False, collate_fn=data_collator)

# Örnek veri kontrolü
for batch in train_loader:
    print(batch)
    break

for batch in test_loader:
    print(batch)
    break

In [4]:
from evaluate import load

In [7]:
import torch
import numpy as np
from transformers import (
    PretrainedConfig,
    BertTokenizerFast,
    BertModel,
    BertPreTrainedModel,
    AutoModelForQuestionAnswering,
    pipeline,
    modeling_outputs
)
from datasets import load_dataset
from torch.utils.data import DataLoader
from tqdm import tqdm
from typing import Optional, Union

class SimpleTransformerConfig(PretrainedConfig):
    def __init__(
        self,
        vocab_size=30522,
        hidden_size=768,
        num_hidden_layers=6,
        num_attention_heads=12,
        intermediate_size=3072,
        hidden_act="gelu",
        hidden_dropout_prob=0.1,
        attention_probs_dropout_prob=0.1,
        max_position_embeddings=512,
        type_vocab_size=2,
        initializer_range=0.02,
        layer_norm_eps=1e-12,
        pad_token_id=0,
        position_embedding_type="absolute",
        use_cache=True,
        classifier_dropout=None,
        num_labels=2,
        **kwargs
    ):
        super().__init__(**kwargs)
        self.vocab_size = vocab_size
        self.hidden_size = hidden_size
        self.num_hidden_layers = num_hidden_layers
        self.num_attention_heads = num_attention_heads
        self.hidden_act = hidden_act
        self.intermediate_size = intermediate_size
        self.hidden_dropout_prob = hidden_dropout_prob
        self.attention_probs_dropout_prob = attention_probs_dropout_prob
        self.max_position_embeddings = max_position_embeddings
        self.type_vocab_size = type_vocab_size
        self.initializer_range = initializer_range
        self.layer_norm_eps = layer_norm_eps
        self.pad_token_id = pad_token_id
        self.position_embedding_type = position_embedding_type
        self.use_cache = use_cache
        self.classifier_dropout = classifier_dropout
        self.num_labels = num_labels

class SimpleTransformerForSequenceClassification(BertPreTrainedModel):
    def __init__(self, config):
        super().__init__(config)
        self.bert = BertModel(config)
        self.dropout = torch.nn.Dropout(config.hidden_dropout_prob)
        self.classifier = torch.nn.Linear(config.hidden_size, config.num_labels)
        self.init_weights()

    def forward(
        self,
        input_ids: Optional[torch.Tensor] = None,
        attention_mask: Optional[torch.Tensor] = None,
        token_type_ids: Optional[torch.Tensor] = None,
        position_ids: Optional[torch.Tensor] = None,
        head_mask: Optional[torch.Tensor] = None,
        inputs_embeds: Optional[torch.Tensor] = None,
        labels: Optional[torch.Tensor] = None,
        output_attentions: Optional[bool] = None,
        output_hidden_states: Optional[bool] = None,
        return_dict: Optional[bool] = None,
    ) -> Union[tuple, modeling_outputs.SequenceClassifierOutput]:
        
        return_dict = return_dict if return_dict is not None else self.config.use_return_dict

        outputs = self.bert(
            input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids,
            position_ids=position_ids,
            head_mask=head_mask,
            inputs_embeds=inputs_embeds,
            output_attentions=output_attentions,
            output_hidden_states=output_hidden_states,
            return_dict=return_dict,
        )

        pooled_output = outputs[1]
        pooled_output = self.dropout(pooled_output)
        logits = self.classifier(pooled_output)

        loss = None
        if labels is not None:
            loss_fct = torch.nn.CrossEntropyLoss()
            loss = loss_fct(logits.view(-1, self.config.num_labels), labels.view(-1))

        if not return_dict:
            output = (logits,) + outputs[2:]
            return ((loss,) + output) if loss is not None else output

        return modeling_outputs.SequenceClassifierOutput(
            loss=loss,
            logits=logits,
            hidden_states=outputs.hidden_states,
            attentions=outputs.attentions,
        )

def evaluate_language_skills(model, tokenizer):
    print("Evaluating language skills...")
    results = {}
    
    try:
        # SQuAD Evaluation
        from transformers import DistilBertTokenizerFast
        squad_sample = load_dataset('squad', split='validation').select(range(10))
        
        qa_tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
        qa_model = AutoModelForQuestionAnswering.from_pretrained('distilbert-base-uncased-distilled-squad')
        
        qa_pipeline = pipeline(
            'question-answering',
            model=qa_model,
            tokenizer=qa_tokenizer,
            device=-1
        )
        
        correct = 0
        for item in tqdm(squad_sample, desc="SQuAD Evaluation"):
            try:
                result = qa_pipeline(
                    question=item['question'],
                    context=item['context'],
                    handle_impossible_answer=True
                )
                if any(result['answer'].strip().lower() in ans.lower() for ans in item['answers']['text']):
                    correct += 1
            except Exception as e:
                continue
        results['squad_accuracy'] = correct / len(squad_sample) if squad_sample else 0

        # GLUE Evaluation (SST-2)
        try:
            dataset = load_dataset('glue', 'sst2', split='validation').select(range(10))
            encoded = tokenizer(
                dataset['sentence'],
                padding=True,
                truncation=True,
                return_tensors='pt',
                return_token_type_ids=True
            )
            
            with torch.no_grad():
                outputs = model(
                    input_ids=encoded['input_ids'],
                    attention_mask=encoded['attention_mask'],
                    token_type_ids=encoded.get('token_type_ids', None)
                )
                preds = outputs.logits.argmax(-1)
                accuracy = (preds == torch.tensor(dataset['label'])).float().mean()
                results['sst2_accuracy'] = accuracy.item()
        except Exception as e:
            print(f"SST-2 Error: {str(e)}")
            results['sst2_error'] = str(e)

    except Exception as e:
        print(f"Language Skills Error: {str(e)}")
        results['language_skills_error'] = str(e)
        
    return results

def evaluate_math_problem_solving(model, tokenizer):
    print("Evaluating math problem-solving...")
    results = {}
    try:
        math_sample = load_dataset('math_dataset', 'arithmetic__add_or_sub', split='train', trust_remote_code=True).select(range(10))
        correct = 0
        for item in tqdm(math_sample, desc="Math Evaluation"):
            inputs = tokenizer(
                f"Problem: {item['question']} Answer:",
                return_tensors="pt",
                padding=True,
                truncation=True
            )
            with torch.no_grad():
                outputs = model(**inputs)
                predicted = outputs.logits.argmax(-1).item()
                try:
                    correct += int(str(predicted) == str(item['answer']))
                except:
                    continue
        results['math_accuracy'] = correct / len(math_sample) if math_sample else 0
    except Exception as e:
        print(f"Math Error: {str(e)}")
        results['math_error'] = str(e)
    return results

def evaluate_coding(model, tokenizer):
    print("Evaluating coding skills...")
    results = {}
    try:
        code_sample = load_dataset('code_search_net', 'python', split='validation', trust_remote_code=True).select(range(10))
        correct = 0
        for item in tqdm(code_sample, desc="Coding Evaluation"):
            inputs = tokenizer(
                item['whole_func_string'],
                truncation=True,
                padding=True,
                return_tensors="pt"
            )
            with torch.no_grad():
                outputs = model(**inputs)
                predictions = outputs.logits.argmax(-1)
                try:
                    decoded = tokenizer.decode(predictions[0], skip_special_tokens=True)
                    correct += int(decoded in item['whole_func_string'])
                except:
                    continue
        results['code_accuracy'] = correct / len(code_sample) if code_sample else 0
    except Exception as e:
        print(f"Coding Error: {str(e)}")
        results['code_error'] = str(e)
    return results

def evaluate_conversation_quality(model, tokenizer):
    print("Evaluating conversation quality...")
    results = {}
    try:
        dialog_sample = load_dataset('daily_dialog', split='validation').select(range(10))
        scores = []
        for item in tqdm(dialog_sample, desc="Conversation Evaluation"):
            context = " ".join(item['dialog'][:-1])
            response = item['dialog'][-1]
            inputs = tokenizer(
                context,
                response,
                truncation=True,
                padding=True,
                return_tensors="pt"
            )
            with torch.no_grad():
                outputs = model(**inputs)
                scores.append(torch.softmax(outputs.logits, dim=-1)[0][1].item())
        results['conversation_score'] = np.mean(scores) if scores else 0
    except Exception as e:
        print(f"Conversation Error: {str(e)}")
        results['conversation_error'] = str(e)
    return results

def evaluate_model_performance(model, tokenizer):
    results = {}
    results.update(evaluate_language_skills(model, tokenizer))
    # results.update(evaluate_math_problem_solving(model, tokenizer))
    results.update(evaluate_coding(model, tokenizer))
    results.update(evaluate_conversation_quality(model, tokenizer))
    print("\nFinal Evaluation Results:")
    for k, v in results.items():
        print(f"{k}: {v:.4f}" if isinstance(v, float) else f"{k}: {v}")
    return results

if __name__ == "__main__":
    config = SimpleTransformerConfig(num_labels=2)
    model = SimpleTransformerForSequenceClassification(config)
    tokenizer = BertTokenizerFast.from_pretrained("bert-base-uncased")
    
    evaluation_results = evaluate_model_performance(model, tokenizer)

Evaluating language skills...


Device set to use cpu
SQuAD Evaluation: 100%|██████████| 10/10 [00:00<00:00, 19.72it/s]


Evaluating coding skills...


Coding Evaluation: 100%|██████████| 10/10 [00:00<00:00, 11.22it/s]


Evaluating conversation quality...


Conversation Evaluation: 100%|██████████| 10/10 [00:00<00:00, 23.68it/s]


Final Evaluation Results:
squad_accuracy: 0.8000
sst2_accuracy: 0.4000
code_accuracy: 0.2000
conversation_score: 0.4821



