In [None]:
import os
from telegram.ext  import Updater, CommandHandler, MessageHandler, Filters, CallbackContext
from telegram import Update
import string
from pymorphy2 import MorphAnalyzer
from stop_words import get_stop_words
import annoy
from gensim.models import Word2Vec, FastText
import pickle
import numpy as np
from tqdm import tqdm_notebook
import pandas as pd

---

In [None]:
## Болталка

### Препроцессинг

morpher = MorphAnalyzer()
sw = set(get_stop_words("ru"))
exclude = set(string.punctuation)

def preprocess_txt(line):
    spls = "".join(i for i in line.strip() if i not in exclude).split()
    spls = [morpher.parse(i.lower())[0].normal_form for i in spls]
    spls = [i for i in spls if i not in sw and i != ""]
    return spls

In [None]:
modelFT = FastText.load("ft_model")
ft_index = annoy.AnnoyIndex(100 ,'angular')

index_map = {}
counter = 0

with open("prepared_answers.txt", "r") as f:
    for line in tqdm_notebook(f):
        n_ft = 0
        spls = line.split("\t")
        index_map[counter] = spls[1]
        question = preprocess_txt(spls[0])
        vector_ft = np.zeros(100)
        for word in question:
            if word in modelFT.wv:
                vector_ft += modelFT.wv[word]
                n_ft += 1
        if n_ft > 0:
            vector_ft = vector_ft / n_ft
        ft_index.add_item(counter, vector_ft)
            
        counter += 1

ft_index.build(10)
ft_index.save('speaker.ann')

In [None]:
ft_index = annoy.AnnoyIndex(100, 'angular')
ft_index.load('speaker.ann')

In [None]:
ft_index.get_nns_by_vector(np.zeros(100), 2)

In [None]:
def embed_txt(txt, idfs, midf):
    n_ft = 0
    vector_ft = np.zeros(100)
    for word in txt:
        if word in modelFT.wv:
            vector_ft += modelFT.wv[word] * 1
            n_ft += 1
    return vector_ft / n_ft

---

In [None]:
## Определение запроса

### Язык

import re 

def is_english(text):
    return bool(re.search(r'[a-zA-Z]', text))

### Ключевые слова (погода)

list_for_weather = ['погода','солнечно', 'осадка', 'дождь', 'снег', 'ливень', 'зонт']
list_for_translation = ['перевод', 'английский']

### Проверка ключевых слов

def is_in_list(text, list):
    for word in text:
        if word in list:
            return True
            break
    return False

---

In [None]:
## Прогноз погоды

### API Request

import requests 
import json

key = '2f1827ed5db24c38a4c122319222108'

def get_weather(city):
    url = f'http://api.weatherapi.com/v1/current.json?key={key}&q={city}&lang=ru'
    requests.get(url)
    response = requests.get(url)
    json_data = json.loads(response.text)

    city = json_data['location']['name']
    time = json_data['location']['localtime'].split(' ')[1]
    temp = json_data['current']['temp_c']
    temp_fl = json_data['current']['feelslike_c']
    condition = json_data['current']['condition']['text']
    wind = json_data['current']['wind_mph']
    uv = json_data['current']['uv']

    msg = f'В городе {city} сейчас {time}. О погоде: {condition}, температура {temp} градусов, ощущается как {temp_fl} градусов, скорость ветра {wind} м/ч, уровень ультрафиолета {uv}'

    return msg

### Определение города

import spacy

nlp = spacy.load("ru_core_news_sm")

def location_recognition(text): 
    locs = []
    doc = nlp(text)
    for ent in doc.ents:
        if ent.label_ == 'LOC':
            locs.append(ent.text)
    locs = [morpher.parse(i.lower())[0].normal_form for i in locs]
    return locs

### Погода в городе

def forecast(text):
    city = location_recognition(text)
    try:
        forecast = get_weather(city)
    except:
        forecast = 'Уточните'
    
    return forecast

---

In [None]:
## Перевод текста

### Загрузка модели

from transformers import FSMTForConditionalGeneration, FSMTTokenizer

model_name = "facebook/wmt19-en-ru"
tokenizer = FSMTTokenizer.from_pretrained(model_name)
model = FSMTForConditionalGeneration.from_pretrained(model_name)

### Функция перевода

def en_ru_translation(text):
    input_ids = tokenizer.encode(text, return_tensors="pt")
    outputs = model.generate(input_ids)
    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return decoded 

---

In [None]:
## Бот

updater = Updater(token='123456:ABC-DEF1234ghIkl-zyx57W2v1u123ew11') # Example Token
dispatcher = updater.dispatcher

def startCommand(update: Update, context: CallbackContext):
    update.message.reply_text('Привет!')
    
# Определение и выполнение запроса пользователя

def textMessage(update: Update, context: CallbackContext):
    input_txt = preprocess_txt(update.message.text)
    
    # Перевод с английского
    if is_english(update.message.text) == True:
        update.message.reply_text(en_ru_translation(update.message.text))
    elif is_in_list(input_txt, list_for_translation) == True:
        update.message.reply_text("Какое предложение вам перевести?") 
        
    # Прогноз погоды    
    elif is_in_list(input_txt, list_for_weather) == True:        
        update.message.reply_text(forecast(update.message.text))
            
    # Болталка
    else: 
        vect_ft = embed_txt(input_txt, {}, 1)
        ft_index_val, distances = ft_index.get_nns_by_vector(vect_ft, 1, include_distances=True)
        if distances[0] > 0.35:
            print(distances[0])
            update.message.reply_text("Не понимаю тебя")
        else:
            update.message.reply_text(index_map[ft_index_val[0]])
            
    return



# on different commands - answer in Telegram
dispatcher.add_handler(CommandHandler("start", startCommand))
dispatcher.add_handler(MessageHandler(Filters.text & ~Filters.command, textMessage))

# Start the Bot
updater.start_polling()
updater.idle()