In [1]:
import pandas as pd

In [3]:
df = pd.read_csv(f"data/labeled_reviews.csv")

In [4]:
df.head()

Unnamed: 0,text,приложение,топливо,карта,поддержка,интерфейс,отчет,эвакуатор,цена,страховка,шины_диски
0,Хочу выразить благодарность компании ППР и лич...,0,0,0,1,0,0,0,0,0,0
1,"Благодарен компании за очень удобные сервисы, ...",0,0,1,1,1,0,0,0,0,0
2,пользуемся в компании картами ППР года три. уд...,0,0,1,0,0,0,0,0,0,0
3,Однажды внезапно заблокировали все аккаунты на...,0,0,0,1,0,0,0,0,0,0
4,Лидер на рынке топливных карт! Самый лучший се...,0,0,0,0,0,0,0,0,0,0


In [5]:
df.tail()

Unnamed: 0,text,приложение,топливо,карта,поддержка,интерфейс,отчет,эвакуатор,цена,страховка,шины_диски
332,все устраивает. и денег водителям в дорогу дае...,0,0,0,1,1,1,0,0,0,0
333,"Информативный личный кабинет, удобные инструме...",0,0,0,0,1,0,0,0,0,0
334,"Отвратительная компания, не возможно следить з...",0,1,0,0,1,1,0,0,0,0
335,Хочется поделиться своей удивительной историей...,0,0,1,0,0,0,0,0,0,0
336,"Стали сотрудничать с компанией Петрол Плюс, ка...",0,0,1,0,1,0,0,0,0,0


In [11]:
import asyncio
import sqlite3
import json
import pandas as pd
import os
from aiogram import Bot, Dispatcher, Router
from aiogram.types import Message, CallbackQuery, InlineKeyboardMarkup, InlineKeyboardButton, FSInputFile
from aiogram.fsm.context import FSMContext
from aiogram.fsm.state import StatesGroup, State
from src.models.recommender import Recommender
from src.utils.config import load_config
from src.data.database import init_db
from src.data.preprocess import preprocess_data
from scripts.populate_db import populate_db
from src.models.aspect_generator import generate_aspects
from src.models.sentiment_generator import generate_sentiments
import logging
logging.basicConfig(level=logging.INFO)

# Initialize bot and dispatcher
BOT_TOKEN = load_config()["bot"]["BOT_TOKEN"]
bot = Bot(token=BOT_TOKEN)
dp = Dispatcher()
router = Router()
dp.include_router(router)

# Define state machine (from bot.py)
class AnalysisStates(StatesGroup):
    waiting_for_choice = State()
    viewing_overall = State()
    viewing_aspect = State()

# Mock message and callback query classes
class MockMessage:
    def __init__(self, text=None, chat_id=123, message_id=1):
        self.text = text
        self.chat = type('Chat', (), {'id': chat_id})()
        self.message_id = message_id
        self._sent_messages = []
        self._sent_photos = []
        self._edited_messages = []

    async def answer(self, text, reply_markup=None, parse_mode=None):
        self._sent_messages.append({'text': text, 'reply_markup': reply_markup, 'parse_mode': parse_mode})
        return self

    async def answer_photo(self, photo, caption=None, reply_markup=None):
        self._sent_photos.append({'photo': photo.path, 'caption': caption, 'reply_markup': reply_markup})
        return self

    async def edit_text(self, text, reply_markup=None):
        self._edited_messages.append({'text': text, 'reply_markup': reply_markup})
        return self

    async def delete(self):
        pass

class MockCallbackQuery:
    def __init__(self, data, message, user_id=123):
        self.data = data
        self.message = message
        self.from_user = type('User', (), {'id': user_id})()

    async def answer(self):
        pass

In [12]:
# Set up and populate database
def setup_database():
    config = load_config()
    db_path = config["database"]["path"]
    
    # Initialize database
    init_db(db_path)
    
    # Sample data (from preprocess.py fallback)
    data = pd.DataFrame([
        {"text": "Приложение постоянно крашится", "sentiment": "negative", "rating": 3},
        {"text": "Поддержка работает окей", "sentiment": "positive", "rating": 4},
        {"text": "Топливо дорого", "sentiment": "negative", "rating": 2},
        {"text": "Интерфейс неудобный", "sentiment": "negative", "rating": 3},
        {"text": "Цена нормальная", "sentiment": "positive", "rating": 5}
    ])
    
    # Preprocess and populate
    preprocessed_df = preprocess_data(data)
    populate_db(preprocessed_df, generate_recommendations=False)
    
    # Generate sentiments and aspects
    with sqlite3.connect(db_path) as conn:
        # Update sentiments
        sentiments_df = pd.read_sql_query("SELECT comment, comment_id FROM feedback WHERE tone IS NULL", conn)
        if not sentiments_df.empty:
            sentiments = generate_sentiments(sentiments_df['comment'].tolist())
            cursor = conn.cursor()
            cursor.executemany("UPDATE feedback SET tone = ? WHERE comment_id = ?", 
                             list(zip(sentiments, sentiments_df['comment_id'])))
            conn.commit()
        
        # Update aspects
        aspects_df = pd.read_sql_query("SELECT comment, comment_id FROM feedback WHERE aspect IS NULL", conn)
        if not aspects_df.empty:
            aspects_lists = generate_aspects(aspects_df['comment'].tolist())
            aspects_json = [json.dumps(aspect_list, ensure_ascii=False) for aspect_list in aspects_lists]
            cursor = conn.cursor()
            cursor.executemany("UPDATE feedback SET aspect = ? WHERE comment_id = ?", 
                             list(zip(aspects_json, aspects_df['comment_id'])))
            conn.commit()

In [17]:
# Register bot handlers (from bot.py)
from telegram_bot.bot import (
    start, choose_company, show_overall, show_competitor, show_aspect_sentiment,
    back_to_main, exit_analysis, fetch_feedback_for_brand, ASPECT_LABELS,
    SENTIMENT_COLORS, RATING_COLORS, ASPECT_COLORS, main_kb, company_kb, aspect_buttons
)
from aiogram.filters import CommandStart
from aiogram import Bot, Dispatcher, Router, F

# Register handlers with router
router.message(CommandStart())(start)
router.callback_query(F.data == "analyze_reviews")(choose_company)
router.callback_query(F.data == "my_company")(show_overall)
router.callback_query(F.data == "competitor")(show_competitor)
router.callback_query(F.data.startswith("aspect_"))(show_aspect_sentiment)
router.callback_query(F.data == "back_to_main")(back_to_main)
router.callback_query(F.data == "exit")(exit_analysis)


<function telegram_bot.bot.exit_analysis(query: aiogram.types.callback_query.CallbackQuery, state: aiogram.fsm.context.FSMContext)>

In [21]:
# Test function
async def run_tests():
    # Setup database
    setup_database()
    
    # Initialize state
    state = FSMContext(storage=dp.storage, key=type('Key', (), {'chat_id': 123, 'user_id': 123})())
    
    # Test 1: Start command
    print("Test 1: Starting bot")
    message = MockMessage(text="/start")
    await start(message, state)
    print("Sent messages:", [m['text'] for m in message._sent_messages])
    assert any("Добро пожаловать" in msg['text'] for msg in message._sent_messages), "Start message not sent"
    
    # Test 2: Analyze reviews
    print("\nTest 2: Choosing to analyze reviews")
    message = MockMessage()
    query = MockCallbackQuery(data="analyze_reviews", message=message)
    await choose_company(query, state)
    print("Edited messages:", [m['text'] for m in message._edited_messages])
    assert any("Кого анализировать" in m['text'] for m in message._edited_messages), "Analyze reviews not triggered"
    
    # Test 3: Selecting 'Моя компания'
    print("\nTest 3: Selecting 'Моя компания'")
    message = MockMessage()
    query = MockCallbackQuery(data="my_company", message=message)
    await show_overall(query, state)
    print("Sent photos:", [p['caption'] for p in message._sent_photos])
    assert len(message._sent_photos) == 5, f"Expected 5 plots, got {len(message._sent_photos)}"
    assert os.path.exists("plots/sentiment_distribution.png"), "Sentiment plot not saved"
    assert os.path.exists("plots/rating_distribution.png"), "Rating plot not saved"
    assert os.path.exists("plots/avg_rating_per_sentiment.png"), "Avg rating plot not saved"
    assert os.path.exists("plots/aspect_frequency.png"), "Aspect frequency plot not saved"
    assert os.path.exists("plots/aspect_sentiment_distribution.png"), "Aspect sentiment plot not saved"
    
    # Test 4: Aspect selection (приложение)
    print("\nTest 4: Selecting aspect 'приложение'")
    message = MockMessage()
    query = MockCallbackQuery(data="aspect_0", message=message)  # приложение is index 0
    await show_aspect_sentiment(query, state)
    print("Sent photos:", [p['caption'] for p in message._sent_photos])
    print("Sent messages:", [m['text'] for m in message._sent_messages])
    assert os.path.exists("plots/aspect_приложение.png"), "Aspect plot not saved"
    assert any("Рекомендации по негативным отзывам для аспекта «приложение»" in m['text'] for m in message._sent_messages), "Recommendations not sent"
    assert any("Обновить приложение" in m['text'] for m in message._sent_messages), "Hardcoded recommendations not correct"
    
    # Test 5: Aspect with no negative comments (поддержка)
    print("\nTest 5: Selecting aspect 'поддержка' (no negative comments)")
    message = MockMessage()
    query = MockCallbackQuery(data="aspect_3", message=message)  # поддержка is index 3
    await show_aspect_sentiment(query, state)
    print("Sent photos:", [p['caption'] for p in message._sent_photos])
    print("Sent messages:", [m['text'] for m in message._sent_messages])
    assert os.path.exists("plots/aspect_поддержка.png"), "Aspect plot not saved"
    assert any("Нет негативных отзывов для аспекта «поддержка»" in m['text'] for m in message._sent_messages), "No negative comments message not sent"
    
    # Test 6: Back to main
    print("\nTest 6: Back to main menu")
    message = MockMessage()
    query = MockCallbackQuery(data="back_to_main", message=message)
    await back_to_main(query, state)
    print("Sent messages:", [m['text'] for m in message._sent_messages])
    assert any("Выберите категорию" in m['text'] for m in message._sent_messages), "Back to main not triggered"
    
    # Test 7: Exit
    print("\nTest 7: Exit analysis")
    message = MockMessage()
    query = MockCallbackQuery(data="exit", message=message)
    await exit_analysis(query, state)
    print("Edited messages:", [m['text'] for m in message._edited_messages])
    assert any("Вы вышли из анализа" in m['text'] for m in message._edited_messages), "Exit not triggered"

In [22]:
# Run tests
# loop = asyncio.get_event_loop()
# loop.run_until_complete(run_tests
await run_tests()
print("\nAll tests completed!")

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda:0
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Found 5 existing comments in the database.
Removed 5 comments that already exist in the database.
Relevance filtering disabled; keeping all non-duplicate comments
Total comments after filtering: 0
No preprocessed data to process. Exiting.
Test 1: Starting bot
Sent messages: ['Добро пожаловать! Мы — SentimentAI.\n\nАвтоматизируем анализ отзывов ваших клиентов.']

Test 2: Choosing to analyze reviews
Edited messages: ['Кого анализировать?']

Test 3: Selecting 'Моя компания'


  return compile(source, filename, mode, flags,


ValueError: zero-size array to reduction operation maximum which has no identity

In [27]:
import asyncio
import sqlite3
import json
import pandas as pd
import os
import logging

from aiogram import Bot, Dispatcher, Router, F
from aiogram.types import Message, CallbackQuery, FSInputFile
from aiogram.fsm.context import FSMContext
from aiogram.fsm.state import StatesGroup, State
from aiogram.filters.command import CommandStart

from src.models.recommender import Recommender
from src.utils.config import load_config
from src.data.database import init_db
from src.data.preprocess import preprocess_data
from scripts.populate_db import populate_db
from src.models.aspect_generator import generate_aspects
from src.models.sentiment_generator import generate_sentiments

logging.basicConfig(level=logging.INFO)

# Initialize bot and dispatcher
BOT_TOKEN = load_config()["bot"]["BOT_TOKEN"]
bot = Bot(token=BOT_TOKEN)
dp = Dispatcher()
router = Router()
dp.include_router(router)

# Define state machine (from bot.py)
class AnalysisStates(StatesGroup):
    waiting_for_choice = State()
    viewing_overall = State()
    viewing_aspect = State()

# Mock message and callback query classes
class MockMessage:
    def __init__(self, text=None, chat_id=123, message_id=1):
        self.text = text
        self.chat = type('Chat', (), {'id': chat_id})()
        self.message_id = message_id
        self._sent_messages = []
        self._sent_photos = []
        self._edited_messages = []

    async def answer(self, text, reply_markup=None, parse_mode=None):
        self._sent_messages.append({'text': text, 'reply_markup': reply_markup, 'parse_mode': parse_mode})
        return self

    async def answer_photo(self, data):
        photo = data.get('photo')
        caption = data.get('caption', 'Unknown caption')
        reply_markup = data.get('reply_markup')
        self._sent_photos.append({'photo': photo, 'caption': caption, 'reply_markup': reply_markup})
        return self

    async def edit_text(self, text, reply_markup=None):
        self._edited_messages.append({'text': text, 'reply_markup': reply_markup})
        return self

    async def delete(self):
        pass

class MockCallbackQuery:
    def __init__(self, data, message, user_id=None):
        self.user_id = 123 if user_id is None else user_id
        self.data = data
        self.message = message
        self.from_user = type('User', (), {'id': self.user_id})()

    async def answer(self):
        pass

# Set up and populate database
def setup_database():
    """
    Initialize and populate database with sample data including ratings.
    """
    config = load_config()
    db_path = config["database"]["path"]
    
    # Initialize database
    init_db(db_path)
    
    # Sample data with ratings
    data = pd.DataFrame([
        {"text": "Приложение постоянно крашится", "sentiment": "negative", "rating": 3},
        {"text": "Поддержка работает окей", "sentiment": "positive", "rating": 4},
        {"text": "Топливо дорого", "sentiment": "negative", "rating": 2},
        {"text": "Интерфейс неудобный", "sentiment": "negative", "rating": 3},
        {"text": "Цена нормальная", "sentiment": "positive", "rating": 5}
    ])
    
    # Preprocess and populate
    preprocessed_df = preprocess_data(data)
    populate_db(db=preprocessed_df, generate_recommendations=False)
    
    # Generate and update sentiments and aspects
    with sqlite3.connect(db_path) as conn:
        # Update sentiments
        sentiments_df = pd.read_sql_query("SELECT comment, sentiment, comment_id FROM feedback WHERE tone IS NULL", conn)
        if not sentiments_df.empty:
            sentiments = generate_sentiments(sentiments_df['comment'].tolist())
            cursor = conn.cursor()
            cursor.executemany("""
                UPDATE feedback
                SET tone = ?
                WHERE comment_id = ?
            """, list(zip(sentiments, sentiments_df['comment_id'])))
            conn.commit()
        
        # Update aspects
        aspects_df = pd.read_sql_query("SELECT comment, comment_id FROM feedback WHERE aspect IS NULL", conn)
        if not aspects_df.empty:
            aspects_lists = generate_aspects(aspects_df['comment'].tolist())
            aspects_json = [json.dumps(aspect_list, ensure_ascii=False) for aspect_list in aspects_lists]
            cursor = conn.cursor()
            cursor.executemany("""
                UPDATE feedback
                SET aspect = ?
                WHERE comment_id = ?
            """, list(zip(aspects_json, aspects_df['comment_id'])))
            conn.commit()

# Register bot handlers (from bot.py)
from telegram_bot.bot import (
    start, choose_company, show_overall, show_competitor, show_aspect_sentiment,
    back_to_main, exit_analysis, fetch_feedback_for_brand, ASPECT_LABELS,
    SENTIMENT_COLORS, RATING_COLORS, ASPECT_COLORS, main_kb, company_kb, aspect_buttons
)

# Register handlers with router
router.message(CommandStart())(start)
router.callback_query(F.data == "analyze_reviews")(choose_company)
router.callback_query(F.data == "my_company")(show_overall)
router.callback_query(F.data == "competitor")(show_competitor)
router.callback_query(F.data.startswith("aspect_"))(show_aspect_sentiment)
router.callback_query(F.data == "back_to_main")(back_to_main)
router.callback_query(F.data == "exit")(exit_analysis)

# Test function
async def run_tests():
    # Setup database
    setup_database()
    
    # Initialize state
    state = FSMContext(storage=dp.storage, key=type('Key', (), {'chat_id': '123', 'user_id': '123'})())
    
    # Test 1: Start command
    print("Test 1: Starting bot")
    message = MockMessage(text="/start")
    await start(message, state)
    print("Sent messages:", [m['text'] for m in message._sent_messages])
    assert any("Добро пожаловать" in msg['text'] for msg in message._sent_messages), "Start message not sent"
    
    # Test 2: Analyze reviews
    print("\nTest 2: Choosing to analyze reviews")
    message = MockMessage()
    query = MockCallbackQuery(data="analyze_reviews", message=message)
    await choose_company(query, state)
    print("Edited messages:", [m['text'] for m in message._edited_messages])
    assert any("Кого анализировать" in msg['text'] for msg in message._edited_messages), "Analyze reviews not triggered"
    
    # Test 3: Selecting 'Моя компания'
    print("\nTest 3: Selecting 'Моя компания'")
    message = MockMessage()
    query = MockCallbackQuery(data="my_company", message=message)
    await show_overall(query, state)
    print("Sent photos:", [p['caption'] for p in message._sent_photos])
    assert len(message._sent_photos) == 5, f"Expected 5 plots, got {len(message._sent_photos)}"
    assert os.path.exists("plots/sentiment_distribution.png"), "Sentiment plot not saved"
    assert os.path.exists("plots/rating_distribution.png"), "Rating plot not saved"
    assert os.path.exists("plots/avg_rating_per_sentiment.png"), "Avg rating plot not saved"
    assert os.path.exists("plots/aspect_frequency.png"), "Aspect frequency plot not saved"
    assert os.path.exists("plots/aspect_sentiment_distribution.png"), "Aspect sentiment plot not saved"
    
    # Test 4: Aspect selection (приложение)
    print("\nTest 4: Selecting aspect 'приложение'")
    message = MockMessage()
    query = MockCallbackQuery(data="aspect_0", message=message)  # приложение is index 0
    await show_aspect_sentiment(query, state)
    print("Sent photos:", [p['caption'] for p in message._sent_photos])
    print("Sent messages:", [m['text'] for m in message._sent_messages])
    assert os.path.exists("plots/aspect_приложение.png"), "Aspect plot not saved"
    assert any("Рекомендации по негативным отзывам для аспекта «приложение»" in m['text'] for m in message._sent_messages), "Recommendations not sent"
    assert any("Обновить приложение" in m['text'] for m in message._sent_messages), "Hardcoded recommendations not correct"
    
    # Test 5: Aspect with no negative comments (поддержка)
    print("\nTest 5: Selecting aspect 'поддержка' (no negative comments)")
    message = MockMessage()
    query = MockCallbackQuery(data="aspect_3", message=message)  # поддержка is index 3
    await show_aspect_sentiment(query, state)
    print("Sent photos:", [p['caption'] for p in message._sent_photos])
    print("Sent messages:", [m['text'] for m in message._sent_messages])
    assert os.path.exists("plots/aspect_поддержка.png"), "Aspect plot not saved"
    assert any("Нет негативных отзывов для аспекта «поддержка»" in m['text'] for m in message._sent_messages), "No negative comments message not sent"
    
    # Test 6: Back to main
    print("\nTest 6: Back to main menu")
    message = MockMessage()
    query = MockCallbackQuery(data="back_to_main", message=message)
    await back_to_main(query, state)
    print("Sent messages:", [m['text'] for m in message._sent_messages])
    assert any("Выберите категорию" in m['text'] for m in message._sent_messages), "Back to main not triggered"
    
    # Test 7: Exit
    print("\nTest 7: Exit analysis")
    message = MockMessage()
    query = MockCallbackQuery(data="exit", message=message)
    await exit_analysis(query, state)
    print("Edited messages:", [m['text'] for m in message._edited_messages])
    assert any("Вы вышли из анализа" in m['text'] for m in message._edited_messages), "Exit not triggered"

# Run tests (add this part if running in notebook or interactive shell)
if __name__ == "__main__":
    import nest_asyncio
    nest_asyncio.apply()  # Fix asyncio issues in Jupyter etc.
    loop = asyncio.get_event_loop()
    loop.run_until_complete(run_tests())
    print("\nAll tests completed!")


INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda:0
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/all-MiniLM-L6-v2


Found 5 existing comments in the database.
Removed 5 comments that already exist in the database.
Relevance filtering disabled; keeping all non-duplicate comments
Total comments after filtering: 0


TypeError: populate_db() got an unexpected keyword argument 'db'