In [6]:
import os
import json
import csv
import zipfile

BASE_DIR = '.'

VISIT_LOG_PATH = os.path.join(BASE_DIR, 'visit_log.csv')
ALT_VISIT_LOG_PATH = os.path.join(BASE_DIR, 'visit_log__1_.csv')
PURCHASE_LOG_ZIP_PATH = os.path.join(BASE_DIR, 'purchase_log.zip')
PURCHASE_LOG_PATH = os.path.join(BASE_DIR, 'purchase_log.txt')
FUNNEL_PATH = os.path.join(BASE_DIR, 'funnel.csv')

def main():
    if os.path.exists(PURCHASE_LOG_ZIP_PATH) and not os.path.exists(PURCHASE_LOG_PATH):
        with zipfile.ZipFile(PURCHASE_LOG_ZIP_PATH, 'r') as zf:
            zf.extractall(BASE_DIR)
            print('Архив purchase_log.zip распакован.')

    global VISIT_LOG_PATH
    if not os.path.exists(VISIT_LOG_PATH):
        if os.path.exists(ALT_VISIT_LOG_PATH):
            VISIT_LOG_PATH = ALT_VISIT_LOG_PATH
            print(f'Использую файл с визитами: {VISIT_LOG_PATH}')
        else:
            raise FileNotFoundError('Файл visit_log.csv не найден. Проверьте имя и расположение файла.')

    for path in [VISIT_LOG_PATH, PURCHASE_LOG_PATH]:
        if not os.path.exists(path):
            raise FileNotFoundError(f'Не найден файл: {path}')

    user_categories = {}

    with open(PURCHASE_LOG_PATH, encoding='utf-8') as f:
        for line in f:
            line = line.strip()
            if not line:
                continue

            record = json.loads(line)
            user_id = record.get('user_id')
            category = record.get('category')

            # Пропускаем строку-заголовок
            if user_id == 'user_id':
                continue

            if not user_id or not category:
                continue

            user_categories.setdefault(user_id, set()).add(category)

    print(f'Уникальных пользователей с покупками: {len(user_categories)}')

    with open(VISIT_LOG_PATH, encoding='utf-8') as visits_file, \
         open(FUNNEL_PATH, 'w', newline='', encoding='utf-8') as funnel_file:

        reader = csv.DictReader(visits_file)
        fieldnames = ['user_id', 'source', 'category']
        writer = csv.DictWriter(funnel_file, fieldnames=fieldnames)

        writer.writeheader()

        for visit in reader:
            user_id = visit.get('user_id')
            if not user_id:
                continue

            source = visit.get('source', '')
            categories = user_categories.get(user_id)

            if categories:
                for category in categories:
                    writer.writerow({
                        'user_id': user_id,
                        'source': source,
                        'category': category
                    })

    print(f'Готово! Файл "{FUNNEL_PATH}" успешно создан.')

if __name__ == '__main__':
    main()

Использую файл с визитами: ./visit_log__1_.csv
Уникальных пользователей с покупками: 99517
Готово! Файл "./funnel.csv" успешно создан.
