In [49]:
# Social Media Mining - Caso Garlasco
# Obiettivo: raccogliere e salvare commenti da video YouTube relativi alla riapertura del caso Chiara Poggi (Garlasco), pubblicati tra marzo 2024 e luglio 2025.
from googleapiclient.discovery import build
import csv
from datetime import datetime
import json
import os
from dotenv import load_dotenv

In [50]:
# Configurazione dell'accesso alle API di YouTube
load_dotenv()
DEVELOPER_KEY = os.environ.get('yt_key')
if not DEVELOPER_KEY:
    raise ValueError("Chiave API non trovata. Assicurati che la variabile 'yt_key' sia nel file .env")
youtube = build("youtube", "v3", developerKey=DEVELOPER_KEY)

In [54]:
# Canali target da cui raccogliere i video
channels = [
    "Rai",
    "Fanpage.it",
    "La Repubblica",
    "Mediaset Infinity",
    "FABRIZIO CORONA"
]

# Parole chiave pertinenti al caso Garlasco
keywords = [
    "Chiara Poggi", "Alberto Stasi", "omicidio Garlasco",
    "delitto di Garlasco", "caso Garlasco", "riapertura caso Garlasco",
    "sentenza Garlasco", "cold case Poggi", "processo Alberto Stasi"
]

# Intervallo temporale per i video
begin_date = datetime(2024, 3, 27)
end_date = datetime(2025, 7, 31)

In [55]:
# Funzione per ottenere l'ID del canale
def getIDfromName(name):
    try:
        request = youtube.search().list(part="snippet", q=name, type="channel", maxResults=1)
        response = request.execute()
        return response['items'][0]['id']['channelId']
    except Exception as e:
        print(f"[Errore] Impossibile ottenere l'ID del canale '{name}': {e}")
        return None

# Funzione per cercare video specifici con parole chiave all'interno del canale
def search_keyword_videos(channel_id, channel_name, keywords):
    results = []
    for keyword in keywords:
        try:
            request = youtube.search().list(
                part="snippet",
                q=keyword,
                channelId=channel_id,
                type="video",
                maxResults=50,
                order="date"
            )
            response = request.execute()
            for item in response["items"]:
                pub_date = datetime.strptime(item["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ")
                if begin_date <= pub_date <= end_date:
                    video_id = item["id"]["videoId"]
                    title = item["snippet"]["title"]
                    results.append((video_id, pub_date.strftime("%Y-%m-%d"), title))
        except Exception as e:
            print(f"[Errore] Ricerca con keyword '{keyword}' per canale '{channel_name}': {e}")
            continue

    if results:
        with open(f"video_ids_{channel_name}_GARLASCO.csv", "w", newline='', encoding="utf-8") as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(["Video ID", "Date", "Title"])
            for vid, date, title in results:
                writer.writerow([vid, date, title])
        print(f"[OK] Salvati video relativi al caso Garlasco per '{channel_name}'")
    else:
        print(f"[Nessun video] trovato per '{channel_name}' con parole chiave.")

# Funzione per lanciare la raccolta dei video
def get_garlasco_videos(channels, keywords):
    for channel in channels:
        print(f"\n>>> Ricerca nel canale: {channel}")
        channel_id = getIDfromName(channel)
        if not channel_id:
            print(f"Saltato: ID non trovato per '{channel}'")
            continue
        search_keyword_videos(channel_id, channel, keywords)

In [56]:
# Avvio raccolta video
get_garlasco_videos(channels, keywords)


>>> Ricerca nel canale: Rai
[Errore] Impossibile ottenere l'ID del canale 'Rai': <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=snippet&q=Rai&type=channel&maxResults=1&key=AIzaSyAINHNw_Xzwt3BId6Ce-ABU7UE3vcO4VZ4&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">
Saltato: ID non trovato per 'Rai'

>>> Ricerca nel canale: Fanpage.it
[Errore] Impossibile ottenere l'ID del canale 'Fanpage.it': <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?part=snippet&q=Fanpage.it&type=channel&maxResults=1&key=AIzaSyAINHNw_Xzwt3BId6Ce-ABU7UE3vcO4VZ4&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/

In [46]:
# Definizione della struttura dati per i commenti
class Comment:
    def __init__(self, id, video_id, content, author, date, likes, reply_to_id=None):
        self.id = id
        self.video_id = video_id
        self.content = content
        self.author = author
        self.date = date
        self.likes = likes
        self.reply_to_id = reply_to_id

In [47]:
# Estrae e struttura i commenti (principali e risposte) da una lista di risultati API
def get_comments_from_response(items):
    comments = []
    for item in items:
        top = item["snippet"]["topLevelComment"]
        try:
            main = Comment(
                id=top["id"],
                video_id=item["snippet"]["videoId"],
                content=top["snippet"]["textDisplay"],
                author=top["snippet"]["authorDisplayName"],
                date=datetime.strptime(top["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ"),
                likes=top["snippet"]["likeCount"]
            )
            comments.append(main)
        except Exception:
            continue

        if "replies" in item:
            for reply in item["replies"]["comments"]:
                try:
                    comments.append(Comment(
                        id=reply["id"],
                        video_id=item["snippet"]["videoId"],
                        content=reply["snippet"]["textDisplay"],
                        author=reply["snippet"]["authorDisplayName"],
                        date=datetime.strptime(reply["snippet"]["publishedAt"], "%Y-%m-%dT%H:%M:%SZ"),
                        likes=reply["snippet"]["likeCount"],
                        reply_to_id=top["id"]
                    ))
                except Exception:
                    continue
    return comments

# Estrae tutti i commenti da un singolo video, scorrendo tutte le pagine se necessario
def get_comments_one_vid(video_id):
    comments = []
    try:
        request = youtube.commentThreads().list(
            part="snippet,replies",
            videoId=video_id,
            textFormat="plainText",
            maxResults=100
        )
        response = request.execute()
        comments.extend(get_comments_from_response(response["items"]))
        next_token = response.get("nextPageToken")
        while next_token:
            request = youtube.commentThreads().list(
                part="snippet,replies",
                videoId=video_id,
                textFormat="plainText",
                maxResults=100,
                pageToken=next_token
            )
            response = request.execute()
            comments.extend(get_comments_from_response(response["items"]))
            next_token = response.get("nextPageToken")
    except Exception as e:
        print(f"[Errore] Estrazione commenti video {video_id}: {e}")
    return comments

# Salva i commenti estratti in un file CSV associato al canale
def save_comments_csv(comments, channel_name):
    with open(f"comments_{channel_name}.csv", "w", newline='', encoding="utf-8") as csvfile:
        writer = csv.writer(csvfile)
        writer.writerow(["Comment ID", "Video ID", "Content", "Author", "Date", "Likes", "Reply To ID"])
        for c in comments:
            writer.writerow([
                c.id, c.video_id, c.content, c.author,
                c.date.strftime("%Y-%m-%d"),
                c.likes, c.reply_to_id
            ])
    print(f"[OK] Commenti salvati in comments_{channel_name}.csv")

# Carica gli ID dei video relativi al caso Garlasco per un canale, estrae i commenti e li salva
def get_comments_from_csv_file(channel):
    try:
        with open(f"video_ids_{channel}_GARLASCO.csv", "r", encoding="utf-8") as csvfile:
            reader = csv.reader(csvfile)
            next(reader)  # salta intestazione
            video_ids = [row[0] for row in reader]
        all_comments = []
        for vid in video_ids:
            all_comments.extend(get_comments_one_vid(vid))
        save_comments_csv(all_comments, channel)
    except FileNotFoundError:
        print(f"[File non trovato] video_ids_{channel}_GARLASCO.csv")
    except Exception as e:
        print(f"[Errore] Raccolta commenti per '{channel}': {e}")

In [48]:
# Estrazione commenti
for channel in channels:
    get_comments_from_csv_file(channel)

[File non trovato] video_ids_Quarto Grado_GARLASCO.csv
[OK] Commenti salvati in comments_Fanpage.it.csv
[OK] Commenti salvati in comments_La Repubblica.csv
[File non trovato] video_ids_Le Iene_GARLASCO.csv
[OK] Commenti salvati in comments_FABRIZIO CORONA.csv
