# A.1 Captura de videos Mostpopular

In [None]:
#!pip install isodate

In [None]:
import googleapiclient.discovery
import pandas as pd
from datetime import datetime, timezone
import isodate
import os
from dotenv import load_dotenv


load_dotenv("/Users/danielmunoz/Documents/EDUCACION/DATA_ANALIST/CURSOS/TFM/.env")
DEVELOPER_KEY = os.getenv("API_KEY")
youtube = googleapiclient.discovery.build(
    'youtube', 'v3', developerKey=DEVELOPER_KEY)


video_data = []
next_page_token = None

while len(video_data) < 200:
    response = youtube.videos().list(
        part="snippet,statistics,contentDetails",
        chart="mostPopular",
        regionCode="ES",
        maxResults=50,
        pageToken=next_page_token
    ).execute()

    for item in response['items']:
        try:
            # Estadísticas de video
            video_id = item['id']
            snippet = item['snippet']
            stats = item['statistics']
            details = item['contentDetails']

            title = snippet.get('title')
            description = snippet.get('description')
            published_at = snippet.get('publishedAt')
            channel_id = snippet.get('channelId')
            channel_title = snippet.get('channelTitle')
            tags = snippet.get('tags', [])
            category_id = snippet.get('categoryId')

            views = int(stats.get('viewCount', 0))
            likes = int(stats.get('likeCount', 0))
            comments = int(stats.get('commentCount', 0))
            favorites = int(stats.get('favoriteCount', 0))

            # Duración en minutos
            duration_iso = details.get('duration')
            duration_seconds = isodate.parse_duration(duration_iso).total_seconds()
            duration_minutes = duration_seconds / 60

            definition = details.get('definition')
            caption = details.get('caption')
            licensed = details.get('licensedContent')

            # Variables derivadas
            video_age_days = (datetime.now(timezone.utc) - datetime.fromisoformat(published_at.replace("Z", "+00:00"))).days
            views_per_day = views / video_age_days if video_age_days > 0 else views
            likes_per_view = likes / views if views > 0 else 0
            comments_per_view = comments / views if views > 0 else 0
            num_tags = len(tags)

            # Estadístias de canales
            channel_response = youtube.channels().list(
                part="snippet,statistics",
                id=channel_id
            ).execute()
            channel_info = channel_response['items'][0]
            channel_stats = channel_info['statistics']
            channel_snippet = channel_info['snippet']

            subscriber_count = int(channel_stats.get('subscriberCount', 0))
            video_count = int(channel_stats.get('videoCount', 0))
            channel_views = int(channel_stats.get('viewCount', 0))
            channel_created = channel_snippet.get('publishedAt')
            channel_age_days = (datetime.now(timezone.utc) - datetime.fromisoformat(channel_created.replace("Z", "+00:00"))).days

         
            video_data.append({
                'video_id': video_id,
                'title': title,
                'description': description,
                'channel_id': channel_id,
                'channel_title': channel_title,
                'published_at': published_at,
                'duration_minutes': duration_minutes,
                'definition': definition,
                'licensed_content': licensed,
                'has_caption': caption == 'true',
                'category_id': category_id,
                'views': views,
                'likes': likes,
                'comments': comments,
                'favorites': favorites,
                'tags_count': num_tags,
                'views_per_day': views_per_day,
                'likes_per_view': likes_per_view,
                'comments_per_view': comments_per_view,
                'video_age_days': video_age_days,
                'subscriber_count': subscriber_count,
                'channel_video_count': video_count,
                'channel_views': channel_views,
                'channel_created': channel_created,
                'channel_age_days': channel_age_days
            })

        except Exception as e:
            print(f"Error procesando video: {e}")
            continue

    next_page_token = response.get('nextPageToken')
    if not next_page_token:
        break

df = pd.DataFrame(video_data)
# Ruta de salida para guardar en CSV
output_path = os.path.expanduser("PATH.csv")
df.to_csv(output_path, index=False)
