In [1]:
!pip install google-api-python-client

Defaulting to user installation because normal site-packages is not writeable
Collecting google-api-python-client
  Downloading google_api_python_client-2.52.0-py2.py3-none-any.whl (8.7 MB)
     |████████████████████████████████| 8.7 MB 25.1 MB/s            
[?25hCollecting google-auth-httplib2>=0.1.0
  Downloading google_auth_httplib2-0.2.0-py2.py3-none-any.whl (9.3 kB)
Collecting google-api-core!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0,<3.0.0dev,>=1.31.5
  Downloading google_api_core-2.8.2-py3-none-any.whl (114 kB)
     |████████████████████████████████| 114 kB 106.8 MB/s            
Collecting uritemplate<5,>=3.0.1
  Downloading uritemplate-4.1.1-py2.py3-none-any.whl (10 kB)
Collecting google-auth<3.0.0dev,>=1.19.0
  Downloading google_auth-2.22.0-py2.py3-none-any.whl (181 kB)
     |████████████████████████████████| 181 kB 100.4 MB/s            
Collecting googleapis-common-protos<2.0dev,>=1.56.2
  Downloading googleapis_common_protos-1.56.3-py2.py3-none-any.whl (211 kB)
     |██████████████

## youtube api로 데이터 수집

### 필드 목록
- video_id: 비디오 ID (고유 식별자)
- title: 비디오 제목
- published_at: 비디오 업로드 날짜
- channel_title: 비디오가 업로드된 채널의 이름
- comment: 댓글 내용
- like_count: 댓글의 좋아요 개수
- published_at: 댓글 작성 날짜

In [None]:
import csv
import time
import json
import pandas as pd
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build

load_dotenv()
api_key = os.getenv('YOUTUBE_API_KEY')
youtube = build('youtube', 'v3', developerKey=api_key)

### 갤럭시 S24 리뷰

In [2]:
search_query = '갤럭시 S24 리뷰'
request = youtube.search().list(
    q=search_query,  
    part='snippet',
    type='video',    
    maxResults=50, 
    order='viewCount',
    publishedAfter='2024-01-17T00:00:00Z'
)
search_response = request.execute()


video_data = [
    {
        'video_id': item['id']['videoId'],
        'title': item['snippet']['title'],
        'published_at': item['snippet']['publishedAt'],  
        'channel_title': item['snippet']['channelTitle'] 
    }
    for item in search_response['items']
]

def get_all_comments(video_id):
    comments = []
    next_page_token = None
    while True:
        request = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            textFormat='plainText',
            pageToken=next_page_token  
        )
        response = request.execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            like_count = item['snippet']['topLevelComment']['snippet']['likeCount']  
            published_at = item['snippet']['topLevelComment']['snippet']['publishedAt']  
            comments.append({'comment': comment, 'like_count': like_count, 'published_at': published_at})

        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break

    return comments

all_comments = {}
for video in video_data:
    video_id = video['video_id']
    title = video['title']
    published_at = video['published_at']
    channel_title = video['channel_title']
    comments = get_all_comments(video_id)
    all_comments[video_id] = {
        'title': title,
        'published_at': published_at,
        'channel_title': channel_title,
        'comments': comments
    }
    
with open('youtube_s24_1.json', 'w', encoding='utf-8') as f:
    json.dump(all_comments, f, ensure_ascii=False, indent=4)

print('저장 완료')

저장 완료


In [17]:
with open('youtube_s24_1.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

rows = []
for video_id, video_info in data.items():
    title = video_info['title']
    published_at = video_info['published_at']
    channel_title = video_info['channel_title']
    for comment_info in video_info['comments']:
        comment = comment_info['comment']
        like_count = comment_info['like_count']
        comment_published_at = comment_info['published_at']
        rows.append([video_id, title, published_at, channel_title, comment, like_count, comment_published_at])

df = pd.DataFrame(rows, columns=['video_id', 'title', 'publish_date', 'channel_name', 'comment', 'like_count', 'comment_publish_date'])
df.to_csv('youtube_s24_1.csv', index=False, encoding='utf-8-sig')

print('CSV 파일 저장 완료')

CSV 파일 저장 완료


In [12]:
df.tail()

Unnamed: 0,Video ID,Title,Published At,Channel Title,Comment,Like Count,Comment Published At
25029,PkcH00u1uYE,[단독 공개] 갤럭시 A35 2주 실사용 장단점 리뷰,2024-06-20T11:53:54Z,티노,굿,0,2024-06-20T11:56:07Z
25030,PkcH00u1uYE,[단독 공개] 갤럭시 A35 2주 실사용 장단점 리뷰,2024-06-20T11:53:54Z,티노,궁금해서 기다리고 있었어요!,3,2024-06-20T11:55:37Z
25031,PkcH00u1uYE,[단독 공개] 갤럭시 A35 2주 실사용 장단점 리뷰,2024-06-20T11:53:54Z,티노,아이패드 혹시 언제...,1,2024-06-20T11:54:45Z
25032,PkcH00u1uYE,[단독 공개] 갤럭시 A35 2주 실사용 장단점 리뷰,2024-06-20T11:53:54Z,티노,❤,0,2024-06-20T11:54:24Z
25033,PkcH00u1uYE,[단독 공개] 갤럭시 A35 2주 실사용 장단점 리뷰,2024-06-20T11:53:54Z,티노,1,0,2024-06-20T11:54:17Z


### 갤럭시 S24 후기

In [3]:
search_query = '갤럭시 S24 후기'
request = youtube.search().list(
    q=search_query,  
    part='snippet',
    type='video',    
    maxResults=50, 
    order='viewCount',
    publishedAfter='2024-01-17T00:00:00Z'
)
search_response = request.execute()


video_data = [
    {
        'video_id': item['id']['videoId'],
        'title': item['snippet']['title'],
        'published_at': item['snippet']['publishedAt'],  
        'channel_title': item['snippet']['channelTitle'] 
    }
    for item in search_response['items']
]

def get_all_comments(video_id):
    comments = []
    next_page_token = None
    while True:
        request = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            textFormat='plainText',
            pageToken=next_page_token 
        )
        response = request.execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            like_count = item['snippet']['topLevelComment']['snippet']['likeCount']  
            published_at = item['snippet']['topLevelComment']['snippet']['publishedAt'] 
            comments.append({'comment': comment, 'like_count': like_count, 'published_at': published_at})

        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break

    return comments

all_comments = {}
for video in video_data:
    video_id = video['video_id']
    title = video['title']
    published_at = video['published_at']
    channel_title = video['channel_title']
    comments = get_all_comments(video_id)
    all_comments[video_id] = {
        'title': title,
        'published_at': published_at,
        'channel_title': channel_title,
        'comments': comments
    }
    
with open('youtube_s24_2.json', 'w', encoding='utf-8') as f:
    json.dump(all_comments, f, ensure_ascii=False, indent=4)

print('저장 완료')

저장 완료


In [18]:
with open('youtube_s24_2.json', 'r', encoding='utf-8') as f:
    data = json.load(f)

rows = []
for video_id, video_info in data.items():
    title = video_info['title']
    published_at = video_info['published_at']
    channel_title = video_info['channel_title']
    for comment_info in video_info['comments']:
        comment = comment_info['comment']
        like_count = comment_info['like_count']
        comment_published_at = comment_info['published_at']
        rows.append([video_id, title, published_at, channel_title, comment, like_count, comment_published_at])

df = pd.DataFrame(rows, columns=['video_id', 'title', 'publish_date', 'channel_name', 'comment', 'like_count', 'comment_publish_date'])
df.to_csv('youtube_s24_2.csv', index=False, encoding='utf-8-sig')

print('CSV 파일 저장 완료')

CSV 파일 저장 완료


In [14]:
df.shape

(22085, 7)

In [15]:
df.head()

Unnamed: 0,Video ID,Title,Published At,Channel Title,Comment,Like Count,Comment Published At
0,BAytLn0NhpI,갤럭시 S24+ VS 갤럭시 S24 울트라 둘 중 무엇을 사야할까 고민된다면?,2024-02-05T11:30:05Z,ITSub잇섭,플러스 겁나 이쁘네,0,2025-01-20T04:33:14Z
1,BAytLn0NhpI,갤럭시 S24+ VS 갤럭시 S24 울트라 둘 중 무엇을 사야할까 고민된다면?,2024-02-05T11:30:05Z,ITSub잇섭,어쩜 이렇게 설명을 잘하지,0,2025-01-17T12:15:56Z
2,BAytLn0NhpI,갤럭시 S24+ VS 갤럭시 S24 울트라 둘 중 무엇을 사야할까 고민된다면?,2024-02-05T11:30:05Z,ITSub잇섭,S24플이랑 24울트라랑 고민ㅈㄴ되네,0,2025-01-17T10:58:47Z
3,BAytLn0NhpI,갤럭시 S24+ VS 갤럭시 S24 울트라 둘 중 무엇을 사야할까 고민된다면?,2024-02-05T11:30:05Z,ITSub잇섭,ㄱㅎ비용 ㅎㅅ이라돈없을것같아도 양ㄱ에서 ㅂ2억낙넉,0,2025-01-16T14:58:31Z
4,BAytLn0NhpI,갤럭시 S24+ VS 갤럭시 S24 울트라 둘 중 무엇을 사야할까 고민된다면?,2024-02-05T11:30:05Z,ITSub잇섭,8th 8F 8B,0,2025-01-09T07:46:38Z
