In [1]:
import requests
import time
import pandas as pd
from tqdm import tqdm
import warnings
import re
warnings.filterwarnings('ignore')

YOUTUBE_API_KEY = "AIzaSyA3BMA8TNuUQpoKRK6qMTUF65oSLlRd2RE"
GAMING_CATEGORY_ID = "20"


In [2]:

def search_channels(query, max_results=50, api_key=YOUTUBE_API_KEY):
    url = "https://www.googleapis.com/youtube/v3/search"
    params = {
        'part': 'snippet',
        'q': query,
        'type': 'video',
        'maxResults': min(max_results, 50),
        'key': api_key
    }
    channels = []
    seen_channels = set()
    
    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        
        if 'items' in data:
            for item in data['items']:
                channel_id = item['snippet'].get('channelId')
                if channel_id and channel_id not in seen_channels:
                    seen_channels.add(channel_id)
                    channel_info = {
                        'channel_id': channel_id,
                        'channel_title': item['snippet'].get('channelTitle', ''),
                        'description': item['snippet'].get('description', ''),
                        'published_at': item['snippet'].get('publishedAt', '')
                    }
                    channels.append(channel_info)
        
        time.sleep(0.1)
    except Exception:
        pass
    
    return channels

# Ключевые слова для поиска gaming каналов
search_queries = ['игры', 'летсплей', 'игровой обзор', 'киберспорт', 'стрим игры', 'Кс', 'CS', 'Counter strike', 'Dota 2', 'major', 'blast premier', 'the international', 'pgl', 'Valorant', 'Champions', 'Minecraft','Counter Strike 2', 'CS2 хайлайты 2025', 'CS2 геймплей 2025', 'Dota 2 The International 2025', 'Dota 2 major 2025','Valorant Champions 2025', 'Valorant турнир 2025', 'Fortnite новый сезон 2025', 'Fortnite челлендж 2025','PUBG 2025', 'PUBG highlights 2025', 'Warzone 2025', 'Warzone нарезка 2025','Apex Legends 2025', 'Apex Legends топ моменты', 'Overwatch 2 gameplay 2025','League of Legends Worlds 2025', 'GTA 6 трейлер', 'GTA 5 RP 2025', 'GTA Online 2025', 'GTA V нарезка 2025','Cyberpunk 2077 Phantom Liberty', 'The Witcher 3 2025', 'Elden Ring 2025','Baldur’s Gate 3 прохождение', 'Hogwarts Legacy 2025', 'Palworld 2025','Stalker 2 2025', 'Atomic Heart DLC 2025', 'Resident Evil 4 Remake 2025','Minecraft 2025', 'Minecraft выживание 2025', 'Minecraft хоррор 2025','Minecraft моды 2025','Roblox 2025', 'Roblox obby 2025', 'Roblox funny moments 2025','Roblox horror 2025', 'Roblox симулятор 2025']

all_channels = []
for query in tqdm(search_queries, desc="Поиск каналов"):
    channels = search_channels(query, max_results=50)
    all_channels.extend(channels)
    time.sleep(0.5)

unique_channels = {}
for channel in all_channels:
    channel_id = channel['channel_id']
    if channel_id not in unique_channels:
        unique_channels[channel_id] = channel

channels_list = list(unique_channels.values())


Поиск каналов: 100%|██████████| 55/55 [01:05<00:00,  1.20s/it]


In [3]:
def get_channel_videos(channel_id, max_results=50, api_key=YOUTUBE_API_KEY):
    videos = []
    try:
        url_channels = "https://www.googleapis.com/youtube/v3/channels"
        params_channels = {
            'part': 'contentDetails',
            'id': channel_id,
            'key': api_key
        }
        response_channels = requests.get(url_channels, params=params_channels, timeout=10)
        response_channels.raise_for_status()
        data_channels = response_channels.json()
        if 'items' not in data_channels or len(data_channels['items']) == 0:
            return videos
        uploads_playlist_id = data_channels['items'][0]['contentDetails']['relatedPlaylists'].get('uploads')
        if not uploads_playlist_id:
            return videos
        time.sleep(0.1)
        url_playlist = "https://www.googleapis.com/youtube/v3/playlistItems"
        params_playlist = {
            'part': 'contentDetails',
            'playlistId': uploads_playlist_id,
            'maxResults': min(max_results, 50),
            'key': api_key
        }
        response_playlist = requests.get(url_playlist, params=params_playlist, timeout=10)
        response_playlist.raise_for_status()
        data_playlist = response_playlist.json()
        if 'items' in data_playlist:
            video_ids = [item['contentDetails']['videoId'] for item in data_playlist['items']]
            if video_ids:
                videos_info = get_videos_details(video_ids, api_key)
                videos.extend(videos_info)
        time.sleep(0.1)
    except Exception:
        pass
    return videos

def get_videos_details(video_ids, api_key=YOUTUBE_API_KEY):
    if not video_ids:
        return []
    video_ids_str = ','.join(video_ids[:50])
    url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        'id': video_ids_str,
        'part': 'snippet,contentDetails,statistics',
        'key': api_key
    }
    videos_info = []
    try:
        response = requests.get(url, params=params, timeout=10)
        response.raise_for_status()
        data = response.json()
        if 'items' in data:
            for item in data['items']:
                video_id = item['id']
                category_id = item['snippet'].get('categoryId', '')
                duration_str = item['contentDetails'].get('duration', 'PT0S')
                duration_seconds = parse_duration(duration_str)
                view_count = int(item['statistics'].get('viewCount', 0)) if 'statistics' in item else 0
                video_info = {
                    'video_id': video_id,
                    'category_id': category_id,
                    'duration_seconds': duration_seconds,
                    'duration_minutes': duration_seconds / 60 if duration_seconds else 0,
                    'views': view_count,
                    'title': item['snippet'].get('title', '')
                }
                videos_info.append(video_info)
        time.sleep(0.1)
    except Exception:
        pass
    return videos_info

def parse_duration(duration_str):
    pattern = r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?'
    match = re.match(pattern, duration_str)
    if not match:
        return 0
    hours = int(match.group(1) or 0)
    minutes = int(match.group(2) or 0)
    seconds = int(match.group(3) or 0)
    total_seconds = hours * 3600 + minutes * 60 + seconds
    return total_seconds


In [4]:
qualified_channels = []
channel_stats = []
MAX_CHANNELS_TO_CHECK = 200
channels_to_check = channels_list[:MAX_CHANNELS_TO_CHECK]
MIN_VIDEOS_REQUIRED = 5

for channel in tqdm(channels_to_check, desc="Анализ каналов"):
    channel_id = channel['channel_id']
    channel_title = channel['channel_title']
    videos = get_channel_videos(channel_id, max_results=50)
    if len(videos) == 0 or len(videos) < MIN_VIDEOS_REQUIRED:
        continue
    total_videos = len(videos)
    gaming_videos = sum(1 for v in videos if v['category_id'] == GAMING_CATEGORY_ID)
    gaming_percentage = (gaming_videos / total_videos) * 100 if total_videos > 0 else 0
    views_list = [v.get('views', 0) for v in videos if v.get('views', 0) > 0]
    avg_views = sum(views_list) / len(views_list) if len(views_list) > 0 else 0
    durations_list = [v.get('duration_seconds', 0) for v in videos if v.get('duration_seconds', 0) > 0]
    avg_duration_seconds = sum(durations_list) / len(durations_list) if len(durations_list) > 0 else 0
    avg_duration_minutes = avg_duration_seconds / 60 if avg_duration_seconds > 0 else 0
    sample_videos = videos[:5] if len(videos) >= 5 else videos
    sample_videos_info = []
    for v in sample_videos:
        sample_videos_info.append({
            'video_id': v.get('video_id', ''),
            'title': v.get('title', ''),
            'duration_minutes': v.get('duration_minutes', 0),
            'views': v.get('views', 0)
        })
    stats = {
        'channel_id': channel_id,
        'channel_title': channel_title,
        'total_videos': total_videos,
        'gaming_videos': gaming_videos,
        'gaming_percentage': gaming_percentage,
        'avg_views': avg_views,
        'avg_duration_minutes': avg_duration_minutes,
        'sample_videos': sample_videos_info,
        'description': channel.get('description', '')
    }
    channel_stats.append(stats)
    if gaming_percentage > 70:
        qualified_channels.append(stats)
    time.sleep(1.0)


Анализ каналов: 100%|██████████| 200/200 [07:37<00:00,  2.29s/it]


In [5]:
if qualified_channels:
    qualified_channels_sorted = sorted(qualified_channels, key=lambda x: x['gaming_percentage'], reverse=True)
    data_for_save = []
    for channel in qualified_channels_sorted:
        row = {
            'channel_id': channel['channel_id'],
            'channel_title': channel['channel_title'],
            'total_videos': channel['total_videos'],
            'gaming_videos': channel['gaming_videos'],
            'gaming_percentage': channel['gaming_percentage'],
            'avg_views': channel.get('avg_views', 0),
            'avg_duration_minutes': channel.get('avg_duration_minutes', 0),
            'description': channel.get('description', '')
        }
        if channel.get('sample_videos'):
            for idx, video in enumerate(channel['sample_videos'][:5], 1):
                row[f'video_{idx}_title'] = video.get('title', '')
                row[f'video_{idx}_duration_min'] = video.get('duration_minutes', 0)
                row[f'video_{idx}_views'] = video.get('views', 0)
        data_for_save.append(row)
    df_qualified = pd.DataFrame(data_for_save)
    df_qualified.to_excel('qualified_gaming_channels.xlsx', index=False)
    df_qualified.to_csv('qualified_gaming_channels.csv', index=False, encoding='utf-8-sig')


In [6]:
if channel_stats:
    data_for_stats = []
    for channel in channel_stats:
        row = {
            'channel_id': channel['channel_id'],
            'channel_title': channel['channel_title'],
            'total_videos': channel['total_videos'],
            'gaming_videos': channel['gaming_videos'],
            'gaming_percentage': channel['gaming_percentage'],
            'avg_views': channel.get('avg_views', 0),
            'avg_duration_minutes': channel.get('avg_duration_minutes', 0),
            'description': channel.get('description', '')
        }
        if channel.get('sample_videos'):
            for idx, video in enumerate(channel['sample_videos'][:5], 1):
                row[f'video_{idx}_title'] = video.get('title', '')
                row[f'video_{idx}_duration_min'] = video.get('duration_minutes', 0)
                row[f'video_{idx}_views'] = video.get('views', 0)
        data_for_stats.append(row)
    df_all_stats = pd.DataFrame(data_for_stats)
    df_all_stats.to_excel('all_channels_stats.xlsx', index=False)
    df_all_stats.to_csv('all_channels_stats.csv', index=False, encoding='utf-8-sig')
