In [2]:
import boto3
import os
import concurrent.futures
import os


region = 'us-east-1'
bedrock = boto3.client(
    service_name='bedrock-runtime',
    region_name=region,
)

# Define the model IDs you want to use
MODEL_IDS = [
    "anthropic.claude-3-5-sonnet-20240620-v1:0",
]

WORD_LIST_DIR = "word_list/"
OUTPUT_FILE = "generated_dictionary.txt"

PROMPT_TEMPLATE = """
Что означает {}? Дайте объяснение только на очень простом русском языке. Дайте синонимы. 
Дайте 2 простых примера предложений.

**Объяснение**: [простое объяснение слова без слова спереди].  
**Синонимы**: [список синонимов].  
**Простые предложения**: «[предложение 1]» «[предложение 2]».

Используйте этот формат, чтобы ответ всегда возвращался в заданной структуре.
"""

In [3]:
def invoke_bedrock_model(client, id, prompt, max_tokens=2000, temperature=0, top_p=0.9):
    try:
        response = client.converse(
            modelId=id,
            messages=[
                {
                    "role": "user",
                    "content": [
                        {
                            "text": prompt
                        }
                    ]
                }
            ],
            inferenceConfig={
                "temperature": temperature,
                "maxTokens": max_tokens,
                "topP": top_p
            }
        )
        result = response['output']['message']['content'][0]['text']
        return result
    except Exception as e:
        print(f"Error invoking model {id}: {e}")
        return None

def load_words_from_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        words = f.read().split()
    return words

def generate_response(prompt):
    for model_id in MODEL_IDS:
        response = invoke_bedrock_model(bedrock, model_id, prompt)
        if response:
            return response
    return "Failed to generate response from all models."

In [None]:
separator1 = "<<<SEPATATOR1>>>"
separator2 = "<<<SEPATATOR2>>>"


def process_word(word):
    prompt_text = PROMPT_TEMPLATE.format(word)
    print(f"Generating response for: {word}")
    response_text = generate_response(prompt_text)
    return f"{word}{separator1}{response_text}{separator2}"


all_words = []

for filename in ["8000.txt", "9000.txt","10000.txt"]:
    file_path = os.path.join(WORD_LIST_DIR, filename)
    if os.path.isfile(file_path):
        words = load_words_from_file(file_path)
        all_words.extend([word for word in words if word.islower()])

with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
    future_to_word = {executor.submit(process_word, word): word for word in all_words}
    
    with open(OUTPUT_FILE, "a", encoding='utf-8') as f:
        for future in concurrent.futures.as_completed(future_to_word):
            word = future_to_word[future]
            result = future.result()
            f.write(result)

In [2]:
import re

a = []  # words
b = []  # meanings/explanations
c = []  # synonyms
d = []  # sample sentences

# Read from file
with open('generated_dictionary.txt', 'r', encoding='utf-8') as file:
    dictionary_text = file.read()

# Split the text into entries
entries = dictionary_text.split("<<<SEPATATOR2>>>")

# Process each entry
for entry in entries:
    parts = entry.split("<<<SEPATATOR1>>>")
    if (len(parts)!=2):
        continue

    word = parts[0].strip()
    content = parts[1].strip()


    # Extract meaning
    meaning_match = re.search(r'^(.*?)(?=\n\n\*\*)', content, re.DOTALL)
    if meaning_match:
        meaning = meaning_match.group(1).strip()
        prefix = "**Объяснение**: "
        meaning= meaning.lstrip(prefix)
        b.append(meaning)
    else:
        continue


    # Extract synonyms
    synonym_match = re.search(r'\*\*Синонимы\*\*:\s*(.*?)(?=\n\n\*\*|$)', content, re.DOTALL)
    if synonym_match:
        c.append(synonym_match.group(1).strip())
    else:
        continue

    # Extract sample sentences
    sentence_match = re.search(r'\*\*Простые предложения\*\*:\s*(.*?)(?=$)', content, re.DOTALL)
    if sentence_match:
        d.append(sentence_match.group(1).strip())
    else:
        continue

    a.append(word)




        
# Print the results
# print("Words:", a)
# print("Meanings:", b)
# print("Synonyms:", c)
# print("Sample Sentences:", d)

print(len(a))
print(len(b))
print(len(c))
print(len(d))


8309
8309
8309
8309


In [8]:
html_content = """
<style>
    body {{font-family: Helvetica, sans-serif; margin: 0; padding:80px; padding-top:0; padding-bottom: 0 ;background: #fff;}}
    .entry {{border-bottom: 1px solid #000; padding: 8px 0;}}
    .word {{font-size: 22px; font-weight: bold; margin-bottom: 4px;}}
    .info {{font-size: 14px; margin-left: 10px;}}
    .label {{font-weight: bold; margin-right: 4px;}}
</style>
{entries}
"""

entries_html = ""
for i in range(len(a)):
    entry_html = f"""
    <div class="entry">
        <div class="word">{a[i]}</div>
        <div class="info"><span class="label">Значение:</span><span class="meaning">{b[i]}</span></div>
        <div class="info"><span class="label">Синонимы:</span>{c[i]}</div>
        <div class="info"><span class="label">Пример:</span>{d[i]}</div>
    </div>
    """
    entries_html += entry_html

final_html = html_content.format(entries=entries_html)

with open('slovar.html', 'w', encoding='utf-8') as f:
    f.write(final_html)