In [None]:
import pandas as pd
import time
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import json
import os

DEVELOPER_KEY = "chiaveAPI" 
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'

def get_youtube_service():
    return build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

def get_video_commenters(youtube, video_id):
    commenters = set() 
    next_page_token = None
    
    while True:
        try:
            request = youtube.commentThreads().list(
                part="snippet,replies", 
                videoId=video_id,
                maxResults=100,
                pageToken=next_page_token,
            )
            
            response = request.execute()
            
            for item in response['items']:
                commenter_name = item['snippet']['topLevelComment']['snippet']['authorDisplayName']
                commenters.add(commenter_name) 
                
                if 'replies' in item:
                    for reply in item['replies']['comments']:
                        reply_author = reply['snippet']['authorDisplayName']
                        commenters.add(reply_author)
            
            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break
                
            time.sleep(0.1)
            
        except HttpError as e:
            print(f"Errore per video {video_id}: {e}")
            break
    
    return list(commenters)

def load_existing_data(json_path):
    if os.path.exists(json_path):
        try:
            with open(json_path, 'r', encoding='utf-8') as f:
                return json.load(f)
        except:
            return {}
    return {}

def save_data(data, json_path):
    with open(json_path, 'w', encoding='utf-8') as f:
        json.dump(data, f, ensure_ascii=False, indent=2)

def get_simple_video_commenters_dict_with_backup(csv_file_path, json_output, save_interval=10):
    df = pd.read_csv(csv_file_path)
    unique_video_ids = df['video_id'].unique()
    youtube = get_youtube_service()
    
    result = load_existing_data(json_output)
    
    processed_videos = set(result.keys())
    remaining_videos = [vid for vid in unique_video_ids if vid not in processed_videos]
    
    print(f"Video totali: {len(unique_video_ids)}")
    print(f"Video già processati: {len(processed_videos)}")
    print(f"Video rimanenti: {len(remaining_videos)}")
    
    # Processa i video rimanenti
    for i, video_id in enumerate(remaining_videos):
        current_total = len(processed_videos) + i + 1
        print(f"Processando {current_total}/{len(unique_video_ids)}: {video_id}")
        
        try:
            commenters = get_video_commenters(youtube, video_id)
            result[video_id] = commenters
            
            # Salva ogni save_interval video
            if (i + 1) % save_interval == 0:
                save_data(result, json_output)
                print(f"💾 Progresso salvato dopo {current_total} video")
                
        except Exception as e:
            print(f"Errore durante il processamento del video {video_id}: {e}")
            save_data(result, json_output)
            continue
            
        time.sleep(1)
    
    # Salvataggio finale
    save_data(result, json_output)
    
    return result

csv_input = "/Users/cristianrossato/Desktop/social/code_3_11/progetto/data/scripts/yt/yt_data/trump.csv"
json_output = "/Users/cristianrossato/Desktop/social/code_3_11/progetto/data/scripts/yt/yt_data/video_commenters_final_11.json"

commenters_dict = get_simple_video_commenters_dict_with_backup(csv_input, json_output, save_interval=10)