In [1]:
import random
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
import ast

In [2]:
BOT_CONFIG = {}

with open('config.txt', 'r') as f:
    content = f.read()
    BOT_CONFIG = ast.literal_eval(content)

In [3]:
# dataset = [['Привет', 'hello'], ['здарова', 'hello'], ..., ['Пока', 'bye'], ...]
dataset = []

for intent, intent_data in BOT_CONFIG['intents'].items():
    for example in intent_data['examples']:
        dataset.append([example, intent])

corpus = [text for text, intent in dataset]
y = [intent for text, intent in dataset]

In [4]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(analyzer='char', ngram_range=(2, 3))
X = vectorizer.fit_transform(corpus)

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
clf = SVC(probability=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
clf.fit(X_train, y_train)

SVC(probability=True)

In [6]:
def get_intent(text):
    proba_list = clf.predict_proba(vectorizer.transform([text]))[0]
    max_proba = max(proba_list)
    print(text, clf.predict(vectorizer.transform([text])), max_proba)
    if max_proba > 0.1:
        index = list(proba_list).index(max_proba)
        return clf.classes_[index]

In [7]:
import random

In [8]:
def get_response_by_intent(intent):
    phrases = BOT_CONFIG['intents'][intent]['responses']
    return random.choice(phrases)

In [9]:
import nltk

In [10]:
with open('dialogues.txt') as f:
    content = f.read()

blocks = content.split('\n\n')

def clear_text(text):
    text = text.lower()
    alphabet = 'абвгдеёжзийклмнопрстуфхцчшщъыьэюя0123456789- '
    result = ''
    for c in text:
        if c in alphabet:
            result += c
    return result

dataset = []
questions = set()

for block in blocks:
    replicas = block.split('\n')[:2]
    if len(replicas) == 2:
        question = clear_text(replicas[0][2:])
        answer = replicas[1][2:]

        if question and answer and question not in questions:
            questions.add(question)
            dataset.append([question, answer])

In [11]:
search_dataset = {}
for question, answer in dataset:
    words = question.split(' ')
    for word in words:
        if word not in search_dataset:
            search_dataset[word] = []
        search_dataset[word].append((question, answer))

search_dataset = {
    word: word_dataset
    for word, word_dataset in search_dataset.items()
    if len(word_dataset) < 1000
}

In [12]:
def get_response_generatively(text):
    text = clear_text(text)
    if not text:
        return
    words = text.split(' ')
    
    words_dataset = set()
    for word in words:
        if word in search_dataset:
            words_dataset |= set(search_dataset[word])

    scores = []

    for question, answer in words_dataset:
        if abs(len(text) - len(question)) / len(question) < 0.4:
            distance = nltk.edit_distance(text, question)
            score = distance / len(question)
            if score < 0.4:
                scores.append([score, question, answer])

    if scores:
        return min(scores, key=lambda s: s[0])[2]

In [13]:
# get_response_generatively('скажи свое имя')

In [14]:
def get_failure_phrase():
    phrases = BOT_CONFIG['failure_phrases']
    return random.choice(phrases)

In [15]:
stats = {'intent': 0, 'generative': 0, 'fails': 0}

In [16]:
def bot(request):
    # NLU
    intent = get_intent(request)

    # Генерация ответа
    if intent:
        stats['intent'] += 1
        return get_response_by_intent(intent)

    response = get_response_generatively(request)
    if response:
        stats['generative'] += 1
        return response

    stats['fails'] += 1
    return get_failure_phrase()

In [17]:
bot('как тебя зовут?')

как тебя зовут? ['country'] 0.04038696056013519


'ааа?'

In [18]:
stats

{'intent': 0, 'generative': 0, 'fails': 1}

In [19]:
! pip install python-telegram-bot

You should consider upgrading via the '/Users/andreinovikov/Documents/Projects/Python/Chat_Bot/env/bin/python3 -m pip install --upgrade pip' command.[0m


In [20]:
from telegram.ext import Updater, CommandHandler, MessageHandler, Filters


def start(update, context):
    """Send a message when the command /start is issued."""
    update.message.reply_text('Hi!')


def help_command(update, context):
    """Send a message when the command /help is issued."""
    update.message.reply_text('Help!')


def use_bot(update, context):
    answer = bot(update.message.text)
    update.message.reply_text(answer)
    print(update.message.text, answer)
    print(stats)
    print()


def main():
    """Start the bot."""
    updater = Updater("802782122:AAGthsTSRzEmtXpij3FnDbwz-0gi7oylmh8", use_context=True)

    dp = updater.dispatcher
    dp.add_handler(CommandHandler("start", start))
    dp.add_handler(CommandHandler("help", help_command))
    dp.add_handler(MessageHandler(Filters.text & ~Filters.command, use_bot))

    updater.start_polling()
    updater.idle()

In [None]:
main()