In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import io
import re
import numpy as np
from collections import Counter
import logging
import asyncio
from telegram import Update
from telegram.ext import Application, CommandHandler, ContextTypes
import nest_asyncio
nest_asyncio.apply()

In [3]:
logging.basicConfig(
    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
    level=logging.INFO
)
logger = logging.getLogger(__name__)

In [4]:
def load_data():
    try:
        df = pd.read_csv('DataAnalystJobs.csv')
        
        # Очистка и преобразование зарплат
        def clean_salary(salary):
            if isinstance(salary, str):
                numbers = re.findall(r'\d+', salary)
                if len(numbers) >= 2:
                    return (int(numbers[0]) + int(numbers[1])) / 2
            return np.nan
        
        df['Salary_Avg'] = df['Salary Estimate'].apply(clean_salary)
        df['City'] = df['Location'].str.split(',').str[0]
        
        # Извлечение навыков
        def extract_skills(text):
            skills = ['python', 'sql', 'r', 'tableau', 'excel', 'machine learning', 
                     'statistics', 'data visualization', 'power bi', 'hadoop', 
                     'spark', 'sas', 'matlab', 'pandas', 'numpy', 'scikit-learn']
            found_skills = []
            text = str(text).lower()
            for skill in skills:
                if skill in text:
                    found_skills.append(skill)
            return found_skills
        
        df['Skills'] = df['Job Description'].apply(extract_skills)
        all_skills = [skill for sublist in df['Skills'] for skill in sublist]
        skill_counts = Counter(all_skills)
        
        # Извлечение опыта
        def extract_experience(desc):
            if isinstance(desc, str):
                desc = desc.lower()
                if 'entry level' in desc or '0-2 years' in desc or '1-2 years' in desc:
                    return 'Entry (0-2 years)'
                elif 'junior' in desc or '2-4 years' in desc:
                    return 'Junior (2-4 years)'
                elif 'senior' in desc or '5+ years' in desc or '5-7 years' in desc:
                    return 'Senior (5+ years)'
                elif 'mid' in desc or '3-5 years' in desc:
                    return 'Mid (3-5 years)'
            return 'Not specified'
        
        df['Experience'] = df['Job Description'].apply(extract_experience)
        
        return df, skill_counts
    
    except Exception as e:
        logger.error(f"Ошибка при загрузке данных: {e}")
        return None, None

In [5]:
df, skill_counts = load_data()

In [6]:
def plot_skills():
    skills_df = pd.DataFrame.from_dict(skill_counts, orient='index').reset_index()
    skills_df.columns = ['Skill', 'Count']
    skills_df = skills_df.sort_values('Count', ascending=False).head(15)
    
    plt.figure(figsize=(10, 6))
    sns.barplot(x='Count', y='Skill', data=skills_df, palette='viridis')
    plt.title('Топ-15 самых востребованных навыков')
    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close()
    buf.seek(0)
    return buf

def plot_city_salary():
    city_counts = df['City'].value_counts()
    top_cities = city_counts[city_counts >= 5].index
    city_salary = df[df['City'].isin(top_cities)].groupby('City')['Salary_Avg'].mean().sort_values(ascending=False).head(10)
    
    plt.figure(figsize=(10, 6))
    city_salary.plot(kind='bar', color='skyblue')
    plt.title('Средняя зарплата по городам (5+ вакансий)')
    plt.ylabel('Зарплата ($K)')
    plt.xticks(rotation=45)
    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close()
    buf.seek(0)
    return buf

def plot_exp_salary():
    exp_salary = df[df['Experience'] != 'Not specified'].groupby('Experience')['Salary_Avg'].mean()
    exp_order = ['Entry (0-2 years)', 'Junior (2-4 years)', 'Mid (3-5 years)', 'Senior (5+ years)']
    exp_salary = exp_salary.reindex(exp_order)
    
    plt.figure(figsize=(10, 6))
    sns.barplot(x=exp_salary.index, y=exp_salary.values, palette='coolwarm')
    plt.title('Зарплаты по уровню опыта')
    plt.ylabel('Зарплата ($K)')
    plt.xticks(rotation=45)
    buf = io.BytesIO()
    plt.savefig(buf, format='png', bbox_inches='tight')
    plt.close()
    buf.seek(0)
    return buf


In [None]:
async def start(update: Update, context: ContextTypes.DEFAULT_TYPE):
    """Отправляет приветственное сообщение"""
    user = update.effective_user
    await update.message.reply_text(
        f"Привет {user.first_name}!\n"
        "Я бот для анализа вакансий Data Analyst.\n"
        "Доступные команды:\n"
        "/skills - Топ навыков\n"
        "/salary_by_city - Зарплаты по городам\n"
        "/salary_by_exp - Зарплаты по опыту\n"
        "/help - Помощь"
    )

async def help_command(update: Update, context: ContextTypes.DEFAULT_TYPE):
    """Отправляет справку"""
    await update.message.reply_text(
        "Доступные команды:\n"
        "/skills - Топ навыков\n"
        "/salary_by_city - Зарплаты по городам\n"
        "/salary_by_exp - Зарплаты по опыту\n"
        "/help - Эта справка"
    )

async def skills_stats(update: Update, context: ContextTypes.DEFAULT_TYPE):
    """Отправляет статистику по навыкам"""
    buf = plot_skills()
    await update.message.reply_photo(photo=buf, caption="Топ-15 самых востребованных навыков для Data Analyst")
    buf.close()

async def salary_by_city(update: Update, context: ContextTypes.DEFAULT_TYPE):
    """Отправляет статистику зарплат по городам"""
    buf = plot_city_salary()
    await update.message.reply_photo(photo=buf, caption="Средние зарплаты Data Analyst по городам США")
    buf.close()

async def salary_by_exp(update: Update, context: ContextTypes.DEFAULT_TYPE):
    """Отправляет статистику зарплат по опыту"""
    buf = plot_exp_salary()
    await update.message.reply_photo(photo=buf, caption="Зависимость зарплаты Data Analyst от опыта работы")
    buf.close()

async def error_handler(update: object, context: ContextTypes.DEFAULT_TYPE):
    """Логирует ошибки"""
    logger.error(f'Ошибка: {context.error}')

def main():
    """Запуск бота"""
    
    TOKEN = "7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8"
    
    
    application = Application.builder().token(TOKEN).build()

  
    application.add_handler(CommandHandler("start", start))
    application.add_handler(CommandHandler("help", help_command))
    application.add_handler(CommandHandler("skills", skills_stats))
    application.add_handler(CommandHandler("salary_by_city", salary_by_city))
    application.add_handler(CommandHandler("salary_by_exp", salary_by_exp))
    
    
    application.add_error_handler(error_handler)

    
    application.run_polling()

if __name__ == '__main__':
    main()

2025-07-10 15:14:18,273 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getMe "HTTP/1.1 200 OK"
2025-07-10 15:14:18,294 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/deleteWebhook "HTTP/1.1 200 OK"
2025-07-10 15:14:18,300 - apscheduler.scheduler - INFO - Scheduler started
2025-07-10 15:14:18,302 - telegram.ext.Application - INFO - Application started
2025-07-10 15:14:27,852 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getUpdates "HTTP/1.1 200 OK"
2025-07-10 15:14:28,407 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/sendPhoto "HTTP/1.1 200 OK"
2025-07-10 15:14:38,270 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getUpdates "HTTP/1.1 200 OK"
2025-07-10 15:14:48,296 - ht

2025-07-10 15:21:30,024 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getUpdates "HTTP/1.1 200 OK"
2025-07-10 15:21:40,041 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getUpdates "HTTP/1.1 200 OK"
2025-07-10 15:21:48,825 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getUpdates "HTTP/1.1 200 OK"
2025-07-10 15:21:48,931 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/sendMessage "HTTP/1.1 200 OK"
2025-07-10 15:21:51,624 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getUpdates "HTTP/1.1 200 OK"
2025-07-10 15:21:52,061 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/sendPhoto "HTTP/1.1 200 OK"
2025-07-10 15:22

2025-07-10 15:28:15,666 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getUpdates "HTTP/1.1 200 OK"
2025-07-10 15:28:16,091 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/sendPhoto "HTTP/1.1 200 OK"
2025-07-10 15:28:21,045 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getUpdates "HTTP/1.1 200 OK"
2025-07-10 15:28:21,540 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/sendPhoto "HTTP/1.1 200 OK"
2025-07-10 15:28:27,110 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/getUpdates "HTTP/1.1 200 OK"
2025-07-10 15:28:27,186 - httpx - INFO - HTTP Request: POST https://api.telegram.org/bot7762132323:AAH1rVzaY97aYpxm2cEMcc79SwicSCRm6U8/sendMessage "HTTP/1.1 200 OK"
2025-07-10 15:28: