In [46]:
import re
import json
import torch
from transformers import GPT2LMHeadModel, GPT2Tokenizer      


class Model:
    def __init__(self, config_path='config/model.json'):
        with open(config_path, 'r') as f:
            config = json.load(f)
        gpt2_name = config.pop('model_name')
        self.template = config.pop('template')

        self.device = config.pop('device')
        self.config = config
        self.context = ''

        self.get_model(gpt2_name, config.pop("model_cpt"))

    def answer(self, text):
        if text[-1] not in {'.', '?', '!'}:
            text += '.'

        self.context += self.template.format(text)
        response = self.generate(self.context)
        
        answer = self.process_response(response)
        self.context += answer

        return answer

    
    def process_response(self, response):
        split = re.split('(\.|\?|\!)', response)
        if '\n' in split[0]:
            answer = split[0].split('\n')[0] +  '.'
        else:
            answer = split[0]
            if len(split) > 1:
                answer += split[1]
        
        return answer

    def generate(self, text):
        max_input_size = self.model.config.n_positions
        if 'max_length' in self.config['generate_config']:
            max_input_size -= self.config['generate_config']['max_length']
        input_ids = self.tokenizer.encode(text, return_tensors='pt').to(self.device)
        if len(input_ids) > max_input_size:
            input_ids = input_ids[-max_input_size:]
        
        with torch.no_grad():
            out = self.model.generate(input_ids, **self.config['generate_config'])
        
        out = out[:, input_ids.shape[1]:]
        generated_text = list(map(self.tokenizer.decode, out))[0]
        
        return generated_text
    
    def get_model(self, model_name, cpt_path):
        self.tokenizer = GPT2Tokenizer.from_pretrained(model_name)
        self.model = GPT2LMHeadModel.from_pretrained(model_name)
        self.model.eval()
        
        if cpt_path is not None:
            cpt = torch.load(cpt_path, map_location='cpu')
            self.model.load_state_dict(cpt['model_state_dict'])

        self.model.to(self.device)


In [47]:
model = Model()

In [52]:
model.answer('Но кто-то же со мной говорит')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


' Никто со мной не говорит.'

In [53]:
model.context

'Пользователь: Кто именно? Голосовой помощник Кант: Пользователь.Пользователь: А ты кто? Голосовой помощник Кант: Я никто.Пользователь: Но кто-то же со мной говорит. Голосовой помощник Кант: Никто со мной не говорит.'

In [2]:
import os
import re
import datetime
import json
import random
import telebot
from telebot.types import InlineKeyboardButton, InlineKeyboardMarkup
from transcribe import ogg2wav, transcribe_audio
from parse import parse_message


In [39]:
# model.model.config.n_positions

In [40]:
# import json
# import torch
# from transformers import GPT2LMHeadModel, GPT2Tokenizer

# class Model:
#     def __init__(self, config_path='config/model.json'):
#         with open(config_path, 'r') as f:
#             config = json.load(f)
#         gpt2_name = config.pop('model_name')
#         self.tokenizer = GPT2Tokenizer.from_pretrained(gpt2_name)
#         self.model = GPT2LMHeadModel.from_pretrained(gpt2_name)
#         self.model.eval()
#         self.device = config.pop('device')
#         self.model.to(self.device)
#         self.config = config


#     def generate(self, text):
#         with torch.no_grad():
#             # if self.device != 'cpu':
#             #     self.model.to(self.device)
#             input_ids = self.tokenizer.encode(text, return_tensors='pt').to(self.device)

#             out = self.model.generate(input_ids, **self.config)
#             generated_text = list(map(self.tokenizer.decode, out))[0]
            

#             return generated_text

from neural import Model


In [41]:
model = Model()

OutOfMemoryError: CUDA out of memory. Tried to allocate 48.00 MiB (GPU 0; 10.92 GiB total capacity; 9.41 GiB already allocated; 34.06 MiB free; 9.43 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [33]:
model.generate('Привет, как')

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


'Привет, как дела? Как дела, а?\nДа, я вчера в магазине купила себе кофе.\nТы вроде не жалеешь, что купил коктейль? :)\nА я вот не могу понять, почему ты не купишь себе вино. Или ты вообще не знаешь про него? И что ты там напишеш, если ты его не пробовала? Мне кажется, ты просто не поняла, о чем я. :(\nЯ вам скажу, в этом году я не смогла ничего купить. Я просто забыла про кусочек хлеба. А в прошлом году купили мне кашу, и я даже не заметила, когда я ее съела. Но я очень люблю кушать. В прошлый раз я купилась на крем с вишней, который мне очень'

In [27]:
# from finance import *

import re
import gspread
from oauth2client.service_account import ServiceAccountCredentials

class SheetWriter:
    def __init__(self, cred_path="config/gsheets.json"):
        scopes = ['https://www.googleapis.com/auth/spreadsheets', 'https://www.googleapis.com/auth/drive']
        self.credentials = ServiceAccountCredentials.from_json_keyfile_name(cred_path, scopes) 

    def write_to_gsheet(self, amount, category, comment):
        file = gspread.authorize(self.credentials) 
        sheet = file.open('финансы').worksheets()[0]
        write_row_ind = len(sheet.col_values(1)) + 1
        sheet.update(f"B{write_row_ind}:D{write_row_ind}", [[amount, category, comment]])

In [34]:
sheet_writer = SheetWriter()

### Skills
- save note
  - from voice
  - add tags
  - from multiple messages
- add expences
  - from voice
  - from markup

In [None]:
def parse_expense(text):
    amount, category, *comment = text.split(' ') 
    amount = int(re.sub('\.', '', amount))
    comment = ' '.join(comment)
    return amount, category, comment

class NoteBot(telebot.TeleBot):
    def __init__(self, cred_path='config/telegram.json'):
        with open(cred_path, 'r') as f:
            d = json.load(f)
            api_token = d['token']
            self.db_path = d['db_path']
            self.admin_chat_id = d['chat_id']
        super().__init__(api_token)
        self.lang = 'ru-RU'
        self.tags = []
        self.model = Model()
        self.wait_value = False
        self.text = ''

    def transcribe_message(self, message):
        self.tags = []
        file_info = self.get_file(message.voice.file_id)
        voice_file = self.download_file(file_info.file_path)
        with open('tmp.ogg', 'wb') as new_file:
            new_file.write(voice_file)

        wav_path = ogg2wav('tmp.ogg')
        transcription = transcribe_audio(wav_path, self.lang)
        os.system('rm tmp.*')
        return transcription
    
    def save_last_message(self):
        dt = str(datetime.datetime.now())
        dt_pfx = re.sub(r'[:]', '-', dt.split('.')[0])
        sv_path = os.path.join(self.db_path, f'{dt_pfx}.md')
        
        note = parse_message(self.last_message, self.tags)
        with open(sv_path, 'w') as f:
            f.write(note)
        self.last_message = ''
    
    def get_config(self):
        return self.model.config
    

    def handle_expense(self, message):
        text = message.text[message.text.index(' ') + 1:]
        values = parse_expense(text)
        self.expense = values
        confirm_message = 'Трата: {}\nCумма: {}\nКомментарий: {}'.format(*values)
        self.send_message(message.chat.id, confirm_message, reply_markup=expense_markup)



In [None]:
bot = NoteBot()

def expense_markup():
    markup = InlineKeyboardMarkup()
    markup.row_width = 1
    markup.add(InlineKeyboardButton("Сохранить", callback_data='save_expense'))
    return markup

def voice_markup():
    markup = InlineKeyboardMarkup()
    markup.row_width = 3
    markup.add(InlineKeyboardButton("Сохранить заметку", callback_data='save_note'),
               InlineKeyboardButton("Сохранить расход", callback_data='parse_expense')
                InlineKeyboardButton("Добавить тэг", callback_data='hashtag'))
    return markup


@bot.callback_query_handler(func=lambda call: True)
def callback_query(call):
    if call.data == 'save_note':
        if str(bot.chat_id) == str(bot.admin_chat_id):
            bot.save_last_message()
            bot.answer_callback_query(call.id, "Note saved")
        else:
            bot.send_message(bot.chat_id, "Ты не Айдар, не буду ничего сохранять!")
            bot.send_message(bot.admin_chat_id, f"{bot.chat_id} пытается сохранить тебе заметку!")
    elif call.data == 'save_expense':
        sheet_writer.write_to_gsheet(*bot.expense)
    
    elif call.data == 'hashtag':
        bot.answer_callback_query(call.id)
        bot.wait_value = 'tag'
        bot.send_message(bot.chat_id, "Введи название тега")
    elif call.data == 'continue':
        text = bot.model.generate(bot.last_message)
        bot.send_message(bot.chat_id, text)


@bot.message_handler(commands=['start'])
def start_message(message):
    bot.last_message = ''
    bot.chat_id = message.chat.id
    bot.send_message(message.chat.id, 'Привет!')


@bot.message_handler(content_types=['voice'])
def handle_voice(message):
    bot.chat_id = message.chat.id
    transcription = bot.transcribe_message(message)
    bot.last_message += transcription + ' '
    bot.send_message(message.chat.id, transcription, reply_markup=voice_markup())


@bot.message_handler(content_types=['text'])
def handle_text(message):
    bot.chat_id = message.chat.id
    if (message.text.startswith('/expense') or ('трата' in message.text.split(' ')[0])):
        bot.handle_expense(message)
    elif message.text.startswith('/random_number'):
        bot.send_message(message.chat.id, random.randint(0, 100))
    elif message.text.startswith('/yes_or_no'):
        bot.send_message(message.chat.id, random.choice(('yes', 'no')))
    elif message.text.startswith('/set_'):
        bot.wait_value = message.text.split('/set_')[1]
        bot.send_message(message.chat.id, f'set {bot.wait_value} to what value?')
    elif message.text.startswith('/config'):
        msg = '; '.join([f'{k}-{v}' for k, v in bot.get_config().items()])
        bot.send_message(message.chat.id, msg)

    elif bot.wait_value == 'tag':
        bot.tags.append(message.text)
        bot.wait_value = False
    elif bot.wait_value:
        if '.' in message.text:
            bot.model.config[bot.wait_value] = float(message.text)
        else:
            bot.model.config[bot.wait_value] = int(message.text)
        bot.wait_value = False
    else:
        bot.last_message += message.text + ' '
        bot.send_message(message.chat.id, bot.last_message, reply_markup=voice_markup())
    

bot.infinity_polling()