**Import Library**

In [10]:
import csv
import time
import traceback
import pandas as pd
from googleapiclient.discovery import build

**Definisi Awal**

Proses Scarping menggunakan YouTube Data API v3 di Google Cloud Console. Perlu [mengaktifkan](https://idwebhost.com/blog/youtube-api-key-adalah/) akun untuk mendapatkan API Key

In [None]:
API_KEY = 'api_key_gcp' #sesuaikan
MAX_COMMENTS = 20000

youtube = build('youtube', 'v3', developerKey=API_KEY)

video_sources = {
    '7lGiipi4czw': 'Kompas',
    'jiP96n4z4Lk': 'CNN Indonesia',
    '-WLpXmnBmxo': 'Najwa Shihab',
}

comments = []

**Fungsi get_comments**

Mengambil data komentar dengan struktur topLevelComment dan reply-nya

In [33]:
def get_comments(video_id, source_name):
    next_page_token = None
    last_report = 0

    while len(comments) < MAX_COMMENTS:
        try:
            request = youtube.commentThreads().list(
                part='snippet',
                videoId=video_id,
                textFormat='plainText',
                maxResults=100,
                pageToken=next_page_token
            )
            response = request.execute()

            for item in response['items']:
                top_comment_snippet = item['snippet']['topLevelComment']['snippet']
                author = top_comment_snippet.get('authorDisplayName', 'Anonim')
                comment_text = top_comment_snippet.get('textDisplay', '')
                comments.append((author, comment_text, source_name))

                # Ambil balasan komentar jika ada
                if item['snippet']['totalReplyCount'] > 0:
                    parent_id = item['snippet']['topLevelComment']['id']
                    reply_request = youtube.comments().list(
                        part='snippet',
                        parentId=parent_id,
                        textFormat='plainText',
                        maxResults=100
                    )
                    reply_response = reply_request.execute()

                    for reply in reply_response['items']:
                        reply_snippet = reply['snippet']
                        reply_author = reply_snippet.get('authorDisplayName', 'Anonim')
                        reply_text = reply_snippet.get('textDisplay', '')
                        comments.append((reply_author, reply_text, source_name))

                # Tampilkan progres setiap 1000 komentar
                if len(comments) // 1000 > last_report:
                    last_report = len(comments) // 1000
                    print(f"[INFO] Telah mengumpulkan {len(comments)} komentar...")

                if len(comments) >= MAX_COMMENTS:
                    break

            next_page_token = response.get('nextPageToken')
            if not next_page_token:
                break

            time.sleep(0.1)

        except Exception as e:
            print(f"[ERROR] Terjadi kesalahan: {e}")
            traceback.print_exc()
            print("Menunggu 5 detik sebelum mencoba lagi...")
            time.sleep(5)

**Jalankan Proses Scraping dan simpan hasil**

In [34]:
print("[START] Mengambil komentar dari YouTube...")
for vid, source in video_sources.items():
    print(f"[INFO] Mengambil komentar dari sumber: {source} (videoId: {vid})")
    get_comments(vid, source)
    if len(comments) >= MAX_COMMENTS:
        break

with open('komentar_prabowo_menjawab.csv', 'w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Nama Akun', 'Komentar', 'Sumber'])
    for author, comment, source in comments:
        writer.writerow([author, comment, source])

print(f"[DONE] {len(comments)} komentar berhasil disimpan ke 'komentar_prabowo_menjawab.csv'")

[START] Mengambil komentar dari YouTube...
[INFO] Mengambil komentar dari sumber: Kompas (videoId: 7lGiipi4czw)
[INFO] Telah mengumpulkan 1000 komentar...
[INFO] Telah mengumpulkan 2010 komentar...
[INFO] Mengambil komentar dari sumber: CNN Indonesia (videoId: jiP96n4z4Lk)
[INFO] Telah mengumpulkan 2261 komentar...
[INFO] Telah mengumpulkan 3003 komentar...
[INFO] Mengambil komentar dari sumber: Najwa Shihab (videoId: -WLpXmnBmxo)
[INFO] Telah mengumpulkan 3326 komentar...
[INFO] Telah mengumpulkan 4000 komentar...
[INFO] Telah mengumpulkan 5000 komentar...
[INFO] Telah mengumpulkan 6005 komentar...
[INFO] Telah mengumpulkan 7036 komentar...
[INFO] Telah mengumpulkan 8000 komentar...
[INFO] Telah mengumpulkan 9000 komentar...
[ERROR] Terjadi kesalahan: <HttpError 404 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=-WLpXmnBmxo&textFormat=plainText&maxResults=100&pageToken=Z2V0X25ld2VzdF9maXJzdC0tQ2dnSWdBUVZGN2ZST0JJRkNJa2dHQUFTQlFpSElCZ0FFZ1

Traceback (most recent call last):
  File "C:\Users\lenovo\AppData\Local\Temp\ipykernel_17288\854061239.py", line 14, in get_comments
    response = request.execute()
               ^^^^^^^^^^^^^^^^^
  File "c:\Users\lenovo\ProyekNLP\Lib\site-packages\googleapiclient\_helpers.py", line 130, in positional_wrapper
    return wrapped(*args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\lenovo\ProyekNLP\Lib\site-packages\googleapiclient\http.py", line 938, in execute
    raise HttpError(resp, content, uri=self.uri)
googleapiclient.errors.HttpError: <HttpError 404 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=-WLpXmnBmxo&textFormat=plainText&maxResults=100&pageToken=Z2V0X25ld2VzdF9maXJzdC0tQ2dnSWdBUVZGN2ZST0JJRkNJa2dHQUFTQlFpSElCZ0FFZ1VJaUNBWUFCSUZDSjBnR0FFU0JRaW9JQmdBSWc0S0RBamNxZE9fQmhEd2twU3JBdw%3D%3D&key=AIzaSyDl6w9rSAsR9jXsWcBQeWLBOKZSHo4eGGo&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/c

[INFO] Telah mengumpulkan 10000 komentar...
[INFO] Telah mengumpulkan 11005 komentar...
[INFO] Telah mengumpulkan 12000 komentar...
[INFO] Telah mengumpulkan 13000 komentar...
[INFO] Telah mengumpulkan 14006 komentar...
[INFO] Telah mengumpulkan 15040 komentar...
[INFO] Telah mengumpulkan 16004 komentar...
[DONE] 16838 komentar berhasil disimpan ke 'komentar_prabowo_menjawab.csv'


In [35]:
data = pd.read_csv('komentar_prabowo_menjawab.csv')
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16838 entries, 0 to 16837
Data columns (total 3 columns):
 #   Column     Non-Null Count  Dtype 
---  ------     --------------  ----- 
 0   Nama Akun  16835 non-null  object
 1   Komentar   16834 non-null  object
 2   Sumber     16838 non-null  object
dtypes: object(3)
memory usage: 394.8+ KB


In [36]:
data.head(10)

Unnamed: 0,Nama Akun,Komentar,Sumber
0,@HarianKompasCetak,Sahabat Kompas bisa mengakses bagian 2 dari wa...,Kompas
1,@eijisangajiadityabps5018,"Maap, akan ada sesi WAPRES MENJAWAB atau tidak...",Kompas
2,@rdmasokeh5253,SUKA TDK SUKA PRESIDEN BUKAN CORONG ANUS PENGH...,Kompas
3,@anyunsuice,Pertanyaan dr HAM plg gobl*k sama sprti cara k...,Kompas
4,@failnfall,coretax mana??,Kompas
5,@hiromichi_,"Maaf, ga percaya portal berita pelacur informa...",Kompas
6,@budayanaketut7256,😢😢😢😢...sangat salut sama bpk presiden,Kompas
7,@tonincuk9397,​@@eijisangajiadityabps5018 kebijakan di Presi...,Kompas
8,@Hanzzz07,​@@tonincuk9397Capres mana lagi ini bg?,Kompas
9,@Kipas_angin_maspiun,"Hilang sudah kredibilitas dan Integritas, bata...",Kompas
