In [1]:
import pandas as pd
from datasets import Dataset
from datasets import load_dataset
from langdetect import detect
from tqdm import tqdm 

In [2]:
# An empty list to store DataFrames for each language
dataframes = []

# Define the list of language configurations
languages = ['de', 'es', 'fr', 'ru', 'tu']

# Number of records to download for each language
records_to_download = 700

# Load the dataset for each language configuration
for lang in languages:
    print(f"Loading {records_to_download} records from MLSUM dataset for language: {lang}")
    dataset = load_dataset('mlsum', lang, split=f'train[:{records_to_download}]', trust_remote_code=True)

    # Convert to pandas DataFrame for easier manipulation
    df = pd.DataFrame(dataset)

    # Add a column for language
    df['language'] = lang

    # Append the DataFrame to the list
    dataframes.append(df)

# Concatenate all DataFrames into one
data = pd.concat(dataframes, ignore_index=True)

# Filter to keep relevant columns
data = data.drop('url', axis=1, errors='ignore')

# Display the filtered DataFrame
print("Filtered Data with Language Detection:")
print(data.head(3))

Loading 700 records from MLSUM dataset for language: de
Loading 700 records from MLSUM dataset for language: es
Loading 700 records from MLSUM dataset for language: fr
Loading 700 records from MLSUM dataset for language: ru
Loading 700 records from MLSUM dataset for language: tu
Filtered Data with Language Detection:
                                                text  \
0  Transport im Viehwaggon, Fleischgeruch in der ...   
1  Marmorner Zebrastreifen, pomp├╢se Geb├дude: Sind...   
2  Wenn an diesem Montag die Landesvorsitzenden d...   

                                             summary    topic  \
0  Transport im Viehwaggon, Fleischgeruch in der ...  politik   
1  Marmorner Zebrastreifen, pomp├╢se Geb├дude: Sind...  politik   
2  Oskar Lafontaine gibt den Parteivorsitz der Li...  politik   

                                               title        date language  
0  So war Auschwitz: Erinnerungen einer Holocaust...  00/01/2010       de  
1  Kommunen in Not (3): Sindelfingen - 

In [3]:
# Create Hugging Face Dataset
dataset = Dataset.from_pandas(data)

# Split dataset for training/evaluation (80% train, 20% validation)
dataset = dataset.train_test_split(test_size=0.2)

In [4]:
from transformers import MBartForConditionalGeneration, MBart50Tokenizer

# Load translation model and tokenizer
translation_model_name = "facebook/mbart-large-50-many-to-one-mmt"
translation_model = MBartForConditionalGeneration.from_pretrained(translation_model_name)
translation_tokenizer = MBart50Tokenizer.from_pretrained(translation_model_name, src_lang="en_XX", tgt_lang="en_XX")

def translate_text(text):
    prompt = f"Translate the text to English: {text}"
    inputs = translation_tokenizer(prompt, return_tensors="pt", max_length=128, truncation=True)
    outputs = translation_model.generate(**inputs)
    translation = translation_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return translation

2024-12-03 07:09:46.551813: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-12-03 07:09:47.623313: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/local/cuda/extras/CUPTI/lib64
2024-12-03 07:09:47.623451: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/cuda/lib64:/usr/local/nccl2/lib:/usr/loca

In [5]:
# Define the translation function to translate the summaries
def translate_summaries(dataset):
    translated_summaries = []
    for summary in tqdm(dataset["summary"], desc="Translating Summaries"):
        translated_summary = translate_text(summary)
        translated_summaries.append(translated_summary)
    return translated_summaries

# Translate the summaries in the dataset
translated_summaries_train = translate_summaries(dataset["train"])
translated_summaries_test = translate_summaries(dataset["test"])

# Now add the translated summaries as a new column
dataset["train"] = dataset["train"].add_column("translated_summary", translated_summaries_train)
dataset["test"] = dataset["test"].add_column("translated_summary", translated_summaries_test)

Translating Summaries: 100%|тЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИ| 2800/2800 [1:42:13<00:00,  2.19s/it]  
Translating Summaries: 100%|тЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИ| 700/700 [26:45<00:00,  2.29s/it]


Flattening the indices:   0%|          | 0/2800 [00:00<?, ? examples/s]

Flattening the indices:   0%|          | 0/700 [00:00<?, ? examples/s]

In [6]:
# Define the translation function
def translate_texts(texts, batch_size=16):
    """
    Translate a list of texts in batches.
    
    Args:
        texts (list): List of strings to translate.
        batch_size (int): Number of texts to translate at a time.

    Returns:
        list: Translated texts.
    """
    translated_texts = []
    for i in tqdm(range(0, len(texts), batch_size), desc="Translating Input Texts"):
        batch = texts[i:i + batch_size]
        # Assume translate_text is a function that translates a single text
        translated_batch = [translate_text(text) for text in batch]
        translated_texts.extend(translated_batch)
    return translated_texts

# Translate the input texts in the train and test datasets
translated_texts_train = translate_texts(dataset["train"]["text"])
translated_texts_test = translate_texts(dataset["test"]["text"])

# Add the translated input texts as a new column
dataset["train"] = dataset["train"].add_column("translated_text", translated_texts_train)
dataset["test"] = dataset["test"].add_column("translated_text", translated_texts_test)

Translating Input Texts: 100%|тЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИ| 175/175 [5:29:34<00:00, 112.99s/it]  
Translating Input Texts: 100%|тЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИтЦИ| 44/44 [1:26:11<00:00, 117.53s/it]


In [7]:
print(dataset["train"].column_names)

['text', 'summary', 'topic', 'title', 'date', 'language', 'translated_summary', 'translated_text']


In [8]:
def preprocess_data_for_summarization(examples):
    return {
        "input_text": [f"Summarize the following text: {text}" for text in examples["translated_text"]],
        "target_text": examples["translated_summary"]  # Use the translated summary here
    }

# Apply the preprocessing function to the dataset
summarization_dataset = dataset.map(preprocess_data_for_summarization, batched=True)

Map:   0%|          | 0/2800 [00:00<?, ? examples/s]

Map:   0%|          | 0/700 [00:00<?, ? examples/s]

In [9]:
# (summarization_dataset["train"]["translated_summary"])

In [10]:
from peft import LoraConfig, get_peft_model, PeftType, TaskType
from transformers import TrainingArguments, Trainer, AutoModelForSeq2SeqLM, AutoTokenizer

# Load summarization model and tokenizer
summarization_model_name = "t5-small"  # Use T5 or any other Seq2Seq model
summarization_model = AutoModelForSeq2SeqLM.from_pretrained(summarization_model_name)
summarization_tokenizer = AutoTokenizer.from_pretrained(summarization_model_name)

# Define LoRA configuration
lora_config = LoraConfig(
    r=8,
    lora_alpha=32,
    lora_dropout=0.1,
    target_modules=["q", "v"],  # LoRA applied to specific layers
    task_type=TaskType.SEQ_2_SEQ_LM
)

# Apply LoRA to the model
lora_model = get_peft_model(summarization_model, lora_config)

# Tokenize data for training
def tokenize_function(examples):
    inputs = summarization_tokenizer(
        examples["input_text"], max_length=512, truncation=True, padding="max_length", return_tensors="pt"
    )
    labels = summarization_tokenizer(
        examples["target_text"], max_length=128, truncation=True, padding="max_length", return_tensors="pt"
    )["input_ids"]
    inputs["labels"] = labels
    return inputs

tokenized_dataset = summarization_dataset.map(tokenize_function, batched=True)

Map:   0%|          | 0/2800 [00:00<?, ? examples/s]

Map:   0%|          | 0/700 [00:00<?, ? examples/s]

In [11]:
# tokenized_dataset["train"]['labels']

In [12]:
training_args = TrainingArguments(
    output_dir="./summarization_lora_results",
    evaluation_strategy="epoch",  # Evaluate at the end of each epoch
    save_strategy="epoch",  # Save at the end of each epoch
    learning_rate=5e-5,
    per_device_train_batch_size=8,
    num_train_epochs=6,
    weight_decay=0.01,
    save_total_limit=2,
    logging_dir="./logs",
    logging_steps=100,
    load_best_model_at_end=True,  # Load the best model at the end of training
)

trainer = Trainer(
    model=lora_model,
    args=training_args,
    train_dataset=tokenized_dataset["train"],
    eval_dataset=tokenized_dataset["test"],
    tokenizer=summarization_tokenizer,
)

# Train the model
trainer.train()

  trainer = Trainer(
Passing a tuple of `past_key_values` is deprecated and will be removed in Transformers v4.48.0. You should pass an instance of `EncoderDecoderCache` instead, e.g. `past_key_values=EncoderDecoderCache.from_legacy_cache(past_key_values)`.


Epoch,Training Loss,Validation Loss
1,1.3104,0.925675
2,0.9247,0.818257
3,0.8372,0.770432
4,0.8187,0.744333
5,0.834,0.733954
6,0.7982,0.730913


TrainOutput(global_step=2100, training_loss=1.7337668827601842, metrics={'train_runtime': 4427.5871, 'train_samples_per_second': 3.794, 'train_steps_per_second': 0.474, 'total_flos': 2288962555084800.0, 'train_loss': 1.7337668827601842, 'epoch': 6.0})

In [26]:
def pipeline(input_text):
    
    # Translate text
    translated_text = translate_text(input_text)
    print("Translated text: ",translated_text)
    
    # Summarize translated text
    prompt = f"summarize this text: {translated_text}"
    inputs = summarization_tokenizer(
            prompt,
            return_tensors="pt",
            max_length=512,
            truncation=True
        )    
    # Generate summary
    outputs = lora_model.generate(
        **inputs,
        max_length=150,
        min_length=20,
        num_beams=4,
        no_repeat_ngram_size=2,
        early_stopping=True
    )

    summary = summarization_tokenizer.decode(outputs[0], skip_special_tokens=True)
    return summary

In [30]:
# Example usage
example_text = """Le soleil se levait doucement ├а l'horizon, projetant une lumi├иre dor├йe sur les collines ondulantes. Les oiseaux commen├зaient ├а chanter, leur m├йlodie emplissant l'air frais du matin. Dans un petit village nich├й au pied des montagnes, les habitants s'affairaient d├йj├а ├а leurs t├вches quotidiennes. Les enfants couraient dans les ruelles pav├йes, riant et jouant ├а cache-cache, tandis que les adultes ouvraient les volets de leurs maisons en pierre.

Marie, une jeune femme aux cheveux bruns et aux yeux p├йtillants, sortit de sa maison en tenant un panier rempli de fleurs fra├оches. Elle aimait commencer sa journ├йe en cueillant des fleurs dans les champs voisins. Les couleurs vives des coquelicots, des marguerites et des bleuets illuminaient le paysage. Tandis qu'elle marchait, elle salua ses voisins avec un sourire chaleureux.

Au centre du village se trouvait une petite place anim├йe o├╣ les commer├зants installaient leurs ├йtals. Le boulanger, avec sa toque blanche et ses mains enfarin├йes, sortait des baguettes croustillantes du four. L'odeur du pain chaud se m├кlait aux ar├┤mes des fruits frais et des herbes vendues par les marchands. Les discussions anim├йes des villageois cr├йaient une ambiance joyeuse et conviviale.

Non loin de l├а, un vieux moulin ├а eau tournait lentement, aliment├й par un ruisseau limpide. Les enfants aimaient s'y rassembler pour jeter des cailloux dans l'eau ou observer les poissons qui nageaient sous la surface. Les plus ├вg├йs racontaient des histoires sur les l├йgendes locales, parlant de chevaliers courageux et de tr├йsors cach├йs dans les montagnes environnantes.

├А la lisi├иre du village, un berger conduisait son troupeau de moutons vers les p├вturages verdoyants. Le tintement des clochettes attach├йes aux cous des animaux r├йsonnait dans l'air tranquille. Les chiens de berger, alertes et ob├йissants, veillaient ├а ce qu'aucun mouton ne s'├йloigne. Le berger, avec son b├вton en bois et son chapeau de paille, semblait en parfaite harmonie avec la nature.

Pendant ce temps, dans une petite ferme ├а la p├йriph├йrie du village, un fermier et sa femme travaillaient dans leur potager. Ils plantaient des l├йgumes et arrosaient les plantes sous le regard curieux d'un chat paresseux qui somnolait sur une"""
result = pipeline(example_text)
print("\nSummary:", result)

Translated text:  The sun rose gently on the horizon, casting a golden light over the rolling hills. The birds began to sing, their melody filling the fresh morning air. In a small village nestled at the foot of the mountains, the inhabitants were already busy with their daily tasks. Children ran through the cobbled streets, laughing and playing hide-and-seek, while adults opened the sides of their stone houses.

Summary: children ran through the cobbled streets, laughing and playing hide-and-seek, while adults opened the sides of their stone houses.


In [28]:
example_text = """рд╡рд╛рд╢рд┐рдВрдЧрдЯрди - рдЬреИрд╕рд╛ рдХрд┐ рдХрд╛рдВрдЧреНрд░реЗрд╕ рдЗрд╕ рд╕рдкреНрддрд╛рд╣ рд╕рддреНрд░ рд╕рдорд╛рдкреНрдд рд╣реЛрдиреЗ рд╕реЗ рдкрд╣рд▓реЗ рдЖрдЦрд┐рд░реА рдЧрддрд┐рд╡рд┐рдзрд┐ рдХреЗ рд▓рд┐рдП рд▓реМрдЯреА рд╣реИ, рдЙрд╕реЗ рд╕рд░рдХрд╛рд░реА рд╢рдЯрдбрд╛рдЙрди рдХреЛ рд░реЛрдХрдиреЗ рдХреЗ рд▓рд┐рдП 20 рджрд┐рд╕рдВрдмрд░ рдХреА рдорд╣рддреНрд╡рдкреВрд░реНрдг рд╕рдордп рд╕реАрдорд╛ рдХрд╛ рд╕рд╛рдордирд╛ рдХрд░рдирд╛ рдкрдбрд╝ рд░рд╣рд╛ рд╣реИред

рдРрд╕рд╛ рдкреНрд░рддреАрдд рд╣реЛрддрд╛ рд╣реИ рдХрд┐ рдбреЗрдореЛрдХреНрд░реЗрдЯ рдФрд░ рд░рд┐рдкрдмреНрд▓рд┐рдХрди рдиреЗ рдПрдХ рд╕рддрдд рдкреНрд░рд╕реНрддрд╛рд╡, рдпрд╛ рд╕реАрдЖрд░ рдкрд╛рд░рд┐рдд рдХрд░рдиреЗ рдХреЗ рд▓рд┐рдП рдЗрд╕реНрддреАрдлрд╛ рджреЗ рджрд┐рдпрд╛ рд╣реИ, рдЬреЛ 2025 рдХреА рд╢реБрд░реБрдЖрдд рдореЗрдВ рд╕рд░рдХрд╛рд░ рдХреЛ рдЕрд╕реНрдерд╛рдпреА рд░реВрдк рд╕реЗ рд╡рд┐рддреНрдд рдкреЛрд╖рд┐рдд рдХрд░реЗрдЧрд╛ - рд╕рдВрднрд╡рддрдГ рдорд╛рд░реНрдЪ - рдХреНрдпреЛрдВрдХрд┐ рдЙрдирдХреЗ рдкрд╛рд╕ рдЗрд╕ рд╡рд░реНрд╖ рдкреВрд░реНрдг рд╡рд┐рддреНрддрдкреЛрд╖рдг рд╕реМрджрд╛ рдХрд░рдиреЗ рдХреЗ рд▓рд┐рдП рд╕рдордп рдирд╣реАрдВ рдмрдЪрд╛ рд╣реИред рджреЛрдиреЛрдВ рдкрд╛рд░реНрдЯрд┐рдпрд╛рдБ рдирдП рд╡рд┐рддреНрддреАрдп рд╡рд░реНрд╖ рдХреЗ рд▓рд┐рдП рд╕рдордЧреНрд░ рд╡реНрдпрдп рд╕реНрддрд░ рдкрд░ рднреА рд╕рд╣рдордд рдирд╣реАрдВ рд╣реБрдИ рд╣реИрдВ, рд╕рд░рдХрд╛рд░ рдХреЗ рд╡рд┐рднрд┐рдиреНрди рд╣рд┐рд╕реНрд╕реЛрдВ рдореЗрдВ рдзрди рдХреИрд╕реЗ рдЖрд╡рдВрдЯрд┐рдд рдХрд┐рдпрд╛ рдЬрд╛рдП, рдЗрд╕рдХреА рддреЛ рдмрд╛рдд рд╣реА рдЫреЛрдбрд╝ рджреЗрдВред


рд░рд┐рдкрдмреНрд▓рд┐рдХрди рдХреЗ рд▓рд┐рдП, рдпрд╣ рджреЛрдзрд╛рд░реА рддрд▓рд╡рд╛рд░ рд╣реИред

рд╕рдордп рд╕реАрдорд╛ рддрдп рдХрд░рдиреЗ рдореЗрдВ рд░рд┐рдкрдмреНрд▓рд┐рдХрди рдХреЗ рд▓рд┐рдП рд▓рд╛рдн рдпрд╣ рд╣реИ рдХрд┐ рдирдП рд╕рд╛рд▓ рдореЗрдВ рд╕рд░рдХрд╛рд░реА рдлрдВрдбрд┐рдВрдЧ рдХреЛ рдЖрдХрд╛рд░ рджреЗрдиреЗ рдХреЗ рд▓рд┐рдП рдЙрдирдХреЗ рдкрд╛рд╕ рдЕрдзрд┐рдХ рд▓рд╛рдн рд╣реЛрдЧрд╛, рдХреНрдпреЛрдВрдХрд┐ рдирд┐рд░реНрд╡рд╛рдЪрд┐рдд рд░рд╛рд╖реНрдЯреНрд░рдкрддрд┐ рдбреЛрдирд╛рд▓реНрдб рдЯреНрд░рдореНрдк рд╡реНрд╣рд╛рдЗрдЯ рд╣рд╛рдЙрд╕ рдореЗрдВ рд▓реМрдЯ рдЖрдПрдВрдЧреЗ рдФрд░ рдЬреАрдУрдкреА рд╕реАрдиреЗрдЯ рдкрд░ рдирд┐рдпрдВрддреНрд░рдг рдХрд░ рд▓реЗрдЧреА рдФрд░ рдПрдХ рд╕рдВрдХреАрд░реНрдг рд╕рджрди рдмрд╣реБрдордд рдмрдирд╛рдП рд░рдЦреЗрдЧреАред

рдмрдбрд╝рд╛ рдирдХрд╛рд░рд╛рддреНрдордХ рдкрдХреНрд╖ рдпрд╣ рд╣реИ рдХрд┐ рдЗрд╕рд╕реЗ рдЯреНрд░рдВрдк рдХреЗ рд░рд╛рд╖реНрдЯреНрд░рдкрддрд┐ рдмрдирдиреЗ рдХреА рд╢реБрд░реБрдЖрдд рдореЗрдВ рд╣реА рдПрдХ рдорд╣рддреНрд╡рдкреВрд░реНрдг рд╕рдордп рд╕реАрдорд╛ рддрдп рд╣реЛ рдЬрд╛рдПрдЧреА, рдЬрд┐рд╕рд╕реЗ рд╕рдВрднрд╛рд╡рд┐рдд рд░реВрдк рд╕реЗ рд╕реАрдиреЗрдЯ рдХреЗ рдорд╛рдзреНрдпрдо рд╕реЗ рдЙрдирдХреЗ рдЙрдореНрдореАрджрд╡рд╛рд░реЛрдВ рдХреА рдкреБрд╖реНрдЯрд┐ рдХрд░рдиреЗ рдФрд░ рд░рд┐рдкрдмреНрд▓рд┐рдХрди рджреНрд╡рд╛рд░рд╛ рдЙрдирдХреЗ рдХрд░ рдХрдЯреМрддреА рдХреЛ рдмрдврд╝рд╛рдиреЗ рдФрд░ рдЙрдирдХреЗ рдЖрд╡реНрд░рдЬрди рдХреЛ рдЖрдЧреЗ рдмрдврд╝рд╛рдиреЗ рдХреЗ рд▓рд┐рдП рдмрдбрд╝реЗ рдкрд╛рд░реНрдЯреА-рд▓рд╛рдЗрди рдмрд┐рд▓ рд╕реЗ рдореВрд▓реНрдпрд╡рд╛рди рд╕рдордп рдмрд░реНрдмрд╛рдж рд╣реЛ рдЬрд╛рдПрдЧрд╛ред рдФрд░ рд╕реАрдорд╛ рд╕реБрд░рдХреНрд╖рд╛ рдПрдЬреЗрдВрдбрд╛ред

"рд╣рдореЗрдВ рдПрдХ рд╣реА рд╕рдордп рдореЗрдВ рдмрд╣реБрдд рд╕рд╛рд░реА рдЪреАрдЬреЗрдВ рдХрд░рдиреА рд╣реИрдВ," рд╣рд╛рдЙрд╕ рдореЗрдЬреЙрд░рд┐рдЯреА рд▓реАрдбрд░ рд╕реНрдЯреАрд╡ рд╕реНрдХреИрд▓рд┐рд╕, рдЖрд░-рд▓рд╛, рдиреЗ рдЯреНрд░рдореНрдк рдХреЗ рджреВрд╕рд░реЗ рдХрд╛рд░реНрдпрдХрд╛рд▓ рдХреЗ рдкрд╣рд▓реЗ 100 рджрд┐рдиреЛрдВ рдХреЗ рдмрд╛рд░реЗ рдореЗрдВ рдХрд╣рд╛ред "рд╣рдо рдЯрд╣рд▓рдиреЗ рдЬрд╛ рд░рд╣реЗ рд╣реИрдВ рдФрд░ рдЧрдо рдЪрдмрд╛ рд░рд╣реЗ рд╣реИрдВред"

рдХреБрдЫ рд░рд┐рдкрдмреНрд▓рд┐рдХрди рдирдП рдЯреНрд░рдореНрдк рд░рд╛рд╖реНрдЯреНрд░рдкрддрд┐ рдкрдж рдХреА рд╢реБрд░реБрдЖрдд рдореЗрдВ рдлрдВрдбрд┐рдВрдЧ рдХреА рд╕рдордп рд╕реАрдорд╛ рдореЗрдВ рдирд╣реАрдВ рдлрдВрд╕рдирд╛ рдкрд╕рдВрдж рдХрд░реЗрдВрдЧреЗред"""
result = pipeline(example_text)
print("\nSummary:", result)

Translated text:  Washington - As the Congress returns to its final activity before the session ends this week, it faces a crucial deadline of December 20 to stop the government shutdown. The Democrats and the Republicans seem to have resigned to pass a sustained motion, or CR, that will provide temporary financing to the government early in 2025 - possibly in March - because they have no time to make a full financing deal this year.

Summary: Democrats and the Republicans seem to have resigned to pass a sustained motion, or CR, that will provide temporary financing to the government early in 2025.


In [29]:
example_text = """┘И╪з╪┤┘Ж╪╖┘Ж тАУ ┘Е╪╣ ╪╣┘И╪п╪й ╪з┘Д┘Г┘И┘Ж╪м╪▒╪│ ┘З╪░╪з ╪з┘Д╪г╪│╪и┘И╪╣ ┘Д┘Д┘В┘К╪з┘Е ╪и┘Е┘И╪м╪й ╪г╪о┘К╪▒╪й ┘Е┘Ж ╪з┘Д┘Ж╪┤╪з╪╖ ┘В╪и┘Д ╪г┘Ж ┘К╪о╪к╪к┘Е ╪м┘Д╪│╪к┘З╪М ┘Б╪е┘Ж┘З ┘К┘И╪з╪м┘З ┘Е┘И╪╣╪п┘Л╪з ┘Ж┘З╪з╪ж┘К┘Л╪з ╪▒╪ж┘К╪│┘К┘Л╪з ┘Б┘К 20 ╪п┘К╪│┘Е╪и╪▒ ┘Д╪к╪м┘Ж╪и ╪е╪║┘Д╪з┘В ╪з┘Д╪н┘Г┘И┘Е╪й.

┘К╪и╪п┘И ╪г┘Ж ╪з┘Д╪п┘К┘Е┘В╪▒╪з╪╖┘К┘К┘Ж ┘И╪з┘Д╪м┘Е┘З┘И╪▒┘К┘К┘Ж ┘Е╪│╪к╪│┘Д┘Е┘И┘Ж ┘Д╪к┘Е╪▒┘К╪▒ ┘В╪▒╪з╪▒ ┘Е╪│╪к┘Е╪▒╪М ╪г┘И CR╪М ┘Е┘Ж ╪┤╪г┘Ж┘З ╪г┘Ж ┘К┘Е┘И┘Д ╪з┘Д╪н┘Г┘И┘Е╪й ┘Е╪д┘В╪к┘Л╪з ╪н╪к┘Й ╪г┘И╪з╪ж┘Д ╪╣╪з┘Е 2025 - ╪╣┘Д┘Й ╪з┘Д╪г╪▒╪м╪н ┘Б┘К ┘Е╪з╪▒╪│ - ╪н┘К╪л ┘К┘Ж┘Б╪п ╪з┘Д┘И┘В╪к ╪г┘Е╪з┘Е┘З┘Е ┘Д┘Д╪к┘И╪╡┘Д ╪е┘Д┘Й ╪з╪к┘Б╪з┘В ╪к┘Е┘И┘К┘Д ┘Г╪з┘Е┘Д ┘З╪░╪з ╪з┘Д╪╣╪з┘Е. ┘И┘Д┘Е ┘К╪к┘Б┘В ╪з┘Д╪╖╪▒┘Б╪з┘Ж ╪н╪к┘Й ╪╣┘Д┘Й ┘Е╪│╪к┘И┘Й ╪з┘Д╪е┘Ж┘Б╪з┘В ╪з┘Д╪е╪м┘Е╪з┘Д┘К ┘Д┘Д╪│┘Ж╪й ╪з┘Д┘Е╪з┘Д┘К╪й ╪з┘Д╪м╪п┘К╪п╪й╪М ┘Ж╪з┘З┘К┘Г ╪╣┘Ж ┘Г┘К┘Б┘К╪й ╪к╪о╪╡┘К╪╡ ╪з┘Д╪г┘Е┘И╪з┘Д ╪╣╪и╪▒ ╪г╪м╪▓╪з╪б ┘Е┘Ж ╪з┘Д╪н┘Г┘И┘Е╪й.

╪г┘Д┘Е╪н ╪▓╪╣┘К┘Е ╪з┘Д╪г╪║┘Д╪и┘К╪й ┘Б┘К ┘Е╪м┘Д╪│ ╪з┘Д╪┤┘К┘И╪о ╪к╪┤╪з┘Г ╪┤┘И┘Е╪▒╪М ╪п┘К┘Е┘В╪▒╪з╪╖┘К ┘Е┘Ж ┘И┘Д╪з┘К╪й ┘Ж┘К┘И┘К┘И╪▒┘Г╪М ╪е┘Д┘Й ╪н╪к┘Е┘К╪й ┘Е╪┤╪▒┘И╪╣ ┘В╪з┘Ж┘И┘Ж ┘В╪╡┘К╪▒ ╪з┘Д╪г╪м┘Д ┘К┘И┘Е ╪з┘Д╪з╪л┘Ж┘К┘Ж╪М ┘В╪з╪ж┘Д╪з┘Л: "┘Ж╪н┘Ж ╪и╪н╪з╪м╪й ╪е┘Д┘Й ╪е╪и┘В╪з╪б ╪з┘Д╪г╪н┘Г╪з┘Е ╪з┘Д┘Е╪л┘К╪▒╪й ┘Д┘Д╪о┘Д╪з┘Б ┘И╪║┘К╪▒ ╪з┘Д╪╢╪▒┘И╪▒┘К╪й ╪о╪з╪▒╪м ╪г┘К ╪к┘Е╪п┘К╪п ┘Д┘Д╪к┘Е┘И┘К┘Д ╪з┘Д╪н┘Г┘И┘Е┘К╪М ┘И╪е┘Д╪з ┘Б╪│┘К┘Г┘И┘Ж ┘Е┘Ж ╪з┘Д╪╡╪╣╪и ╪к┘Е╪▒┘К╪▒ ┘Е╪┤╪▒┘И╪╣ ┘В╪з┘Ж┘И┘Ж". CR ┘Б┘К ╪з┘Д┘И┘В╪к ╪з┘Д┘Е┘Ж╪з╪│╪и.

┘И╪и╪з┘Д┘Ж╪│╪и╪й ┘Д┘Д╪м┘Е┘З┘И╪▒┘К┘К┘Ж╪М ┘Б╪е┘Ж ┘З╪░╪з ╪│┘К┘Б ╪░┘И ╪н╪п┘К┘Ж.

╪з┘Д╪м╪з┘Ж╪и ╪з┘Д╪е┘К╪м╪з╪и┘К ╪и╪з┘Д┘Ж╪│╪и╪й ┘Д┘Д╪м┘Е┘З┘И╪▒┘К┘К┘Ж ┘Б┘К ╪к╪н╪п┘К╪п ╪з┘Д┘Е┘И╪╣╪п ╪з┘Д┘Ж┘З╪з╪ж┘К ┘З┘И ╪г┘Ж┘З┘Е ╪│┘К┘Г┘И┘Ж ┘Д╪п┘К┘З┘Е ╪з┘Д┘Е╪▓┘К╪п ┘Е┘Ж ╪з┘Д┘Ж┘Б┘И╪░ ┘Д╪к╪┤┘Г┘К┘Д ╪з┘Д╪к┘Е┘И┘К┘Д ╪з┘Д╪н┘Г┘И┘Е┘К ┘Б┘К ╪з┘Д╪╣╪з┘Е ╪з┘Д╪м╪п┘К╪п╪М ┘Е╪╣ ╪╣┘И╪п╪й ╪з┘Д╪▒╪ж┘К╪│ ╪з┘Д┘Е┘Ж╪к╪о╪и ╪п┘И┘Ж╪з┘Д╪п ╪к╪▒╪з┘Е╪и ╪е┘Д┘Й ╪з┘Д╪и┘К╪к ╪з┘Д╪г╪и┘К╪╢ ┘И╪│┘К╪╖╪▒╪й ╪з┘Д╪н╪▓╪и ╪з┘Д╪м┘Е┘З┘И╪▒┘К ╪╣┘Д┘Й ┘Е╪м┘Д╪│ ╪з┘Д╪┤┘К┘И╪о ┘И╪з┘Д╪н┘Б╪з╪╕ ╪╣┘Д┘Й ╪г╪║┘Д╪и┘К╪й ╪╢┘К┘В╪й ┘Б┘К ┘Е╪м┘Д╪│ ╪з┘Д┘Ж┘И╪з╪и.

╪з┘Д╪м╪з┘Ж╪и ╪з┘Д╪│┘Д╪и┘К ╪з┘Д┘Г╪и┘К╪▒ ┘З┘И ╪г┘Ж┘З ╪│┘К╪н╪п╪п ┘Е┘И╪╣╪п┘Л╪з ┘Ж┘З╪з╪ж┘К┘Л╪з ╪н╪з╪│┘Е┘Л╪з ┘Б┘К ┘И┘В╪к ┘Е╪и┘Г╪▒ ┘Е┘Ж ╪▒╪ж╪з╪│╪й ╪к╪▒╪з┘Е╪и╪М ┘Е┘Е╪з ┘В╪п ┘К╪│╪к╪║╪▒┘В ┘И┘В╪к┘Л╪з ╪л┘Е┘К┘Ж┘Л╪з ╪и╪╣┘К╪п┘Л╪з ╪╣┘Ж ╪к╪г┘Г┘К╪п ┘Е╪▒╪┤╪н┘К┘З ┘Е┘Ж ╪о┘Д╪з┘Д ┘Е╪м┘Д╪│ ╪з┘Д╪┤┘К┘И╪о ┘И┘Е┘Ж ┘Е╪┤╪▒┘И╪╣ ┘В╪з┘Ж┘И┘Ж ╪з┘Д╪н╪▓╪и ╪з┘Д┘Г╪и┘К╪▒ ╪з┘Д╪░┘К ┘К╪к╪╖┘Д╪╣ ╪е┘Д┘К┘З ╪з┘Д╪м┘Е┘З┘И╪▒┘К┘И┘Ж ┘Д╪к┘Е╪п┘К╪п ╪к╪о┘Б┘К╪╢╪з╪к┘З ╪з┘Д╪╢╪▒┘К╪и┘К╪й ┘И╪к╪╣╪▓┘К╪▓ ╪з┘Д┘З╪м╪▒╪й. ┘И╪г╪м┘Ж╪п╪й ╪г┘Е┘Ж ╪з┘Д╪н╪п┘И╪п.

┘И╪г╪╢╪з┘Б ┘Г┘К┘Ж┘К╪п┘К ╪г┘Ж┘З ┘Е┘Ж ╪з┘Д┘Е╪▒╪м╪н ╪г┘Ж ┘К╪╣┘Д┘В ╪з┘Д┘Г┘И┘Ж╪м╪▒╪│ "30 [┘Е┘Д┘К╪з╪▒ ╪п┘И┘Д╪з╪▒] ╪е┘Д┘Й 40 ┘Е┘Д┘К╪з╪▒ ╪п┘И┘Д╪з╪▒ ┘Е┘Ж ╪з┘Д╪е╪║╪з╪л╪й ┘Б┘К ╪н╪з┘Д╪з╪к ╪з┘Д┘Г┘И╪з╪▒╪л" ╪е┘Д┘Й ╪з┘Д╪м┘Е┘З┘И╪▒┘К╪й ╪з┘Д╪к╪┤┘К┘Г┘К╪й╪М ╪и┘Е╪з ┘Б┘К ╪░┘Д┘Г ╪к┘Е┘И┘К┘Д ╪з┘Д┘И┘Д╪з┘К╪з╪к ╪з┘Д╪к┘К ╪╢╪▒╪и╪к┘З╪з ╪з┘Д╪г╪╣╪з╪╡┘К╪▒ ┘З╪░╪з ╪з┘Д╪╣╪з┘Е. ┘И┘В╪з┘Д: "┘Д┘Ж ┘К┘Г┘И┘Ж ╪░┘Д┘Г ┘Г╪з┘Б┘К╪з╪М ┘Д┘Г┘Ж┘З ╪│┘К┘Г┘И┘Ж ┘Г╪з┘Б┘К╪з ┘Д┘Д╪и╪п╪б"."""

result = pipeline(example_text)
print("\nSummary:", result)

Translated text:  WASHINGTON, DC тАУ With Congress returning this week to make one final move before it closes, it faces a major deadline on December 20 to avoid shutting down the government. Democrats and Republicans seem resigned to pushing through a decision, or CR, that would temporarily fund the government until early 2025 тАУ probably in March тАУ when they run out of time to agree on full funding this year.

Summary: Democrats and Republicans seem resigned to pushing through a decision that would temporarily fund the government until early 2025.


In [31]:
# Save the translation model and tokenizer
translation_model.save_pretrained("./translation_model")
translation_tokenizer.save_pretrained("./translation_model")



('./translation_model/tokenizer_config.json',
 './translation_model/special_tokens_map.json',
 './translation_model/sentencepiece.bpe.model',
 './translation_model/added_tokens.json')

In [14]:
lora_model.save_pretrained("./lora_summarization_model")
summarization_tokenizer.save_pretrained("./lora_summarization_model")