In [None]:
# data_processing.py
import pandas as pd
import polars as pl
import duckdb

def process_data(file_path: str, file_type: str = 'parquet'):
    # Чтение данных
    if file_type == 'parquet':
        df_pd = pd.read_parquet(file_path)
        df_pl = pl.read_parquet(file_path)
    else:  # CSV
        df_pd = pd.read_csv(file_path)
        df_pl = pl.read_csv(file_path)
    
    # Анализ данных (пример)
    analysis = duckdb.sql(f"""
        SELECT 
            category, 
            AVG(price) AS avg_price,
            COUNT(*) AS count
        FROM df_pd
        GROUP BY category
        ORDER BY avg_price DESC
    """).df()
    
    # Фильтрация и преобразования
    filtered = df_pl.filter(
        pl.col('date').is_between('2023-01-01', '2023-12-31')
    ).with_columns(
        (pl.col('price') * 1.2).alias('price_with_tax')
    )
    
    return analysis, filtered.to_pandas()

In [None]:
# sql_style.py
import duckdb
import pandas as pd

def execute_sql_on_data(df: pd.DataFrame, sql_query: str):
    """
    Пример SQL-запроса:
    SELECT category, SUM(sales) as total_sales
    FROM df 
    WHERE date > '2023-01-01'
    GROUP BY category
    """
    return duckdb.query(sql_query).df()

In [None]:
# analysis.py
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats

def analyze_data(df: pd.DataFrame):
    # Базовые статистики
    stats = df.describe(percentiles=[0.25, 0.5, 0.75, 0.99])
    
    # Поиск аномалий (пример)
    z_scores = stats.zscore(df['value'])
    anomalies = df[(z_scores > 3) | (z_scores < -3)]
    
    # Визуализация
    plt.figure(figsize=(12, 6))
    sns.boxplot(x=df['category'], y=df['value'])
    plt.xticks(rotation=45)
    plt.savefig('boxplot.png', bbox_inches='tight')
    
    return stats, anomalies

In [None]:
# bot.py
from aiogram import Bot, Dispatcher, types, executor
import logging

API_TOKEN = 'YOUR_TOKEN'
bot = Bot(token=API_TOKEN)
dp = Dispatcher(bot)

# Обработка команд
@dp.message_handler(commands=['start'])
async def start_cmd(message: types.Message):
    await message.answer("Привет! Отправь мне данные в формате CSV")

# Обработка документов
@dp.message_handler(content_types=types.ContentType.DOCUMENT)
async def handle_docs(message: types.Message):
    if message.document.mime_type == 'text/csv':
        await message.document.download()
        await message.answer("Файл получен! Обрабатываю...")
        # Здесь добавить обработку файла
    else:
        await message.answer("Отправьте CSV файл")

if __name__ == '__main__':
    executor.start_polling(dp, skip_updates=True)

In [None]:
# scraping.py
import requests
from bs4 import BeautifulSoup
from playwright.sync_api import sync_playwright

def simple_scrape(url: str):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')
    titles = [h1.text for h1 in soup.find_all('h1')]
    return titles

def js_scrape(url: str):
    """Для JavaScript-сайтов"""
    with sync_playwright() as p:
        browser = p.chromium.launch()
        page = browser.new_page()
        page.goto(url)
        content = page.content()
        # Дополнительная обработка...
        browser.close()
    return content

In [None]:
# rag.py
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

def setup_rag(file_path: str):
    # Загрузка и обработка документа
    loader = TextLoader(file_path)
    documents = loader.load()
    
    # Разделение на чанки
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=1000, 
        chunk_overlap=200
    )
    docs = text_splitter.split_documents(documents)
    
    # Создание векторного хранилища
    embeddings = OpenAIEmbeddings()
    vectorstore = FAISS.from_documents(docs, embeddings)
    
    # Настройка QA-системы
    qa = RetrievalQA.from_chain_type(
        llm=OpenAI(),
        chain_type="stuff",
        retriever=vectorstore.as_retriever()
    )
    return qa