# Тренировка

In [None]:
import os
from datasets import load_dataset

data_dir = os.path.join(os.pardir, 'data')
dataset = load_dataset('csv', data_files={
    'train': os.path.join(data_dir, 'train.csv'),
    'test': os.path.join(data_dir, 'test.csv')
})

train_dataset = dataset['train']
test_dataset = dataset['test']



  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from transformers import AutoTokenizer, MambaForCausalLM
from transformers import DataCollatorForLanguageModeling

model_id = "state-spaces/mamba-130m-hf"
tokenizer = AutoTokenizer.from_pretrained(model_id)

def preprocess_function(examples):
    texts = [f"Translate {type} differential equation: {equation}. Solution: {answer}" 
             for equation, answer, type in zip(examples['equation'], examples['answer'], examples['type'])]
    
    tokenized = tokenizer(texts, truncation=True, max_length=512, padding="max_length")
    return tokenized

tokenized_train = train_dataset.map(preprocess_function, batched=True)
tokenized_test = test_dataset.map(preprocess_function, batched=True)

model = MambaForCausalLM.from_pretrained(model_id)

data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer, 
    mlm=False 
)



  Referenced from: <5AA8DD3D-A2CC-31CA-8060-88B4E9C18B09> /Users/vovazakharov/anaconda3/envs/shad/lib/python3.10/site-packages/torchvision/image.so
  warn(
Map: 100%|██████████| 15235/15235 [00:01<00:00, 8526.45 examples/s]
The fast path is not available because one of `(selective_state_update, selective_scan_fn, causal_conv1d_fn, causal_conv1d_update, mamba_inner_fn)` is None. Falling back to the sequential implementation of Mamba, as use_mambapy is set to False. To install follow https://github.com/state-spaces/mamba/#installation and https://github.com/Dao-AILab/causal-conv1d. For the mamba.py backend, follow https://github.com/alxndrTL/mamba.py.


In [None]:
import torch
from transformers import TrainingArguments, Trainer

training_args = TrainingArguments(
    output_dir="./mamba_diffeq",
    evaluation_strategy="steps",
    eval_steps=500,
    save_strategy="steps",
    save_steps=500,
    learning_rate=5e-5,
    per_device_train_batch_size=4,
    per_device_eval_batch_size=4,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    load_best_model_at_end=True,
    fp16=True if torch.cuda.is_available() else False,
    gradient_accumulation_steps=4,
    report_to="tensorboard"
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train,
    eval_dataset=tokenized_test,
    data_collator=data_collator,
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting tensorboard
  Downloading tensorboard-2.19.0-py3-none-any.whl.metadata (1.8 kB)
Collecting absl-py>=0.4 (from tensorboard)
  Downloading absl_py-2.2.1-py3-none-any.whl.metadata (2.4 kB)
Collecting grpcio>=1.48.2 (from tensorboard)
  Downloading grpcio-1.71.0-cp310-cp310-macosx_12_0_universal2.whl.metadata (3.8 kB)
Collecting markdown>=2.6.8 (from tensorboard)
  Downloading Markdown-3.7-py3-none-any.whl.metadata (7.0 kB)
Collecting protobuf!=4.24.0,>=3.19.6 (from tensorboard)
  Downloading protobuf-6.30.2-cp39-abi3-macosx_10_9_universal2.whl.metadata (593 bytes)
Collecting tensorboard-data-server<0.8.0,>=0.7.0 (from tensorboard)
  Downloading tensorboard_data_server-0.7.2-py3-none-any.whl.metadata (1.1 kB)
Collecting werkzeug>=1.0.1 (from tensorboard)
  Downloading werkzeug-3.1.3-py3-none-any.whl.metadata (3.7 kB)
Downloading tensorboard-2.19.0-py3-none-any.whl (5.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.5/5.5 MB[0m [31m12.8 MB/s[0m eta [36m0:



In [None]:
trainer.train()

trainer.save_model("./mamba_diffeq")
tokenizer.save_pretrained("./mamba_diffeq")

# Инференс

In [None]:
from transformers import pipeline

translator = pipeline(
    "text-generation",
    model="./mamba_diffeq",
    tokenizer=tokenizer,
    device=0 if torch.cuda.is_available() else -1
)

input_eq = "Translate polynomial differential equation: dy/dx = x^2. Solution: "
generated = translator(
    input_eq,
    max_length=200,
    num_return_sequences=1,
    temperature=0.7,
    top_p=0.9
)

print(generated[0]['generated_text'])