## youtube api로 데이터 수집

### 필드 목록
- video_id: 비디오 ID (고유 식별자)
- title: 비디오 제목
- published_at: 비디오 업로드 날짜
- channel_title: 비디오가 업로드된 채널의 이름
- comment: 댓글 내용
- like_count: 댓글의 좋아요 개수
- published_at: 댓글 작성 날짜

In [8]:
import csv
import time
import json
import pandas as pd
from googleapiclient.discovery import build

api_key = 'AIzaSyBVLeWNpSeMRAcCTe0yIXfsodlUKWWKoDU '
youtube = build('youtube', 'v3', developerKey=api_key)

### 아이폰 16 리뷰

In [11]:
# 첫 번째 요청: 50개 가져오기
search_query = '아이폰 16 리뷰'
request = youtube.search().list(
    q=search_query,
    part='snippet',
    type='video',
    maxResults=50,
    order='viewCount',
    publishedAfter='2024-09-10T00:00:00Z'
)
search_response = request.execute()

video_data = [
    {
        'video_id': item['id']['videoId'],
        'title': item['snippet']['title'],
        'published_at': item['snippet']['publishedAt'],
        'channel_title': item['snippet']['channelTitle']
    }
    for item in search_response['items']
]

# 첫 번째 동영상 제거
video_data = video_data[1:]

next_page_token = search_response.get("nextPageToken")

if next_page_token:
    request = youtube.search().list(
        q=search_query,
        part='snippet',
        type='video',
        maxResults=1,  
        order='viewCount',
        publishedAfter='2024-09-10T00:00:00Z',
        pageToken=next_page_token
    )
    next_response = request.execute()
    
    additional_video = [
        {
            'video_id': item['id']['videoId'],
            'title': item['snippet']['title'],
            'published_at': item['snippet']['publishedAt'],
            'channel_title': item['snippet']['channelTitle']
        }
        for item in next_response['items']
    ]

    video_data.extend(additional_video)  

def get_all_comments(video_id):
    comments = []
    next_page_token = None
    while True:
        request = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            textFormat='plainText',
            pageToken=next_page_token
        )
        response = request.execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            like_count = item['snippet']['topLevelComment']['snippet']['likeCount']
            published_at = item['snippet']['topLevelComment']['snippet']['publishedAt']
            comments.append({'comment': comment, 'like_count': like_count, 'published_at': published_at})

        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break
        time.sleep(2)

    return comments

all_comments = {}
for video in video_data:
    video_id = video['video_id']
    title = video['title']
    published_at = video['published_at']
    channel_title = video['channel_title']
    comments = get_all_comments(video_id)
    all_comments[video_id] = {
        'title': title,
        'published_at': published_at,
        'channel_title': channel_title,
        'comments': comments
    }

with open('youtube_i16_1.json', 'w', encoding='utf-8') as f:
    json.dump(all_comments, f, ensure_ascii=False, indent=4)

print('저장 완료')

저장 완료


In [17]:
csv_data = []
for video_id, video_info in all_comments.items():
    title = video_info['title']
    published_at = video_info['published_at']
    channel_title = video_info['channel_title']
    comments = video_info['comments']

    for comment in comments:
        csv_data.append({
            'video_id': video_id,
            'title': title,
            'publish_date': published_at,
            'channel_name': channel_title,
            'comment': comment['comment'],
            'like_count': comment['like_count'],
            'comment_publish_date': comment['published_at']
        })

# CSV 파일로 저장
csv_file = 'youtube_i16_1.csv'
csv_columns = ['video_id', 'title', 'publish_date', 'channel_name', 'comment', 'like_count', 'comment_publish_date']

with open(csv_file, mode='w', newline='', encoding='utf-8-sig') as file:
    writer = csv.DictWriter(file, fieldnames=csv_columns)
    writer.writeheader()
    writer.writerows(csv_data)

In [18]:
df = pd.read_csv('youtube_i16_1.csv')
df.head()

Unnamed: 0,video_id,title,publish_date,channel_name,comment,like_count,comment_publish_date
0,cdiVvaq6UO4,무조건 아이폰 프로만 고집했던 사람들이.. 슬슬 이젠 일반형을 사는 이유?,2024-09-15T11:00:36Z,ITSub잇섭,9:15 똥믈리에 PD님이랑 두명 너무 분워기가 다른 거 아닙니까,0,2025-01-31T11:02:31Z
1,cdiVvaq6UO4,무조건 아이폰 프로만 고집했던 사람들이.. 슬슬 이젠 일반형을 사는 이유?,2024-09-15T11:00:36Z,ITSub잇섭,"갤럭시를 쓰면 생활,삶의 질이 좋아지고 아이폰을 쓰면 기분이 좋아진다는 말이 진리....",0,2025-01-30T07:40:59Z
2,cdiVvaq6UO4,무조건 아이폰 프로만 고집했던 사람들이.. 슬슬 이젠 일반형을 사는 이유?,2024-09-15T11:00:36Z,ITSub잇섭,잇섭 프로 맥스 ??,0,2025-01-30T00:48:25Z
3,cdiVvaq6UO4,무조건 아이폰 프로만 고집했던 사람들이.. 슬슬 이젠 일반형을 사는 이유?,2024-09-15T11:00:36Z,ITSub잇섭,패드는 어차피 영상 보고 이러는게 주라 에어도 쌉가능인데 ㅈㄴ 마니 사용하는 폰은 ...,0,2025-01-22T15:30:34Z
4,cdiVvaq6UO4,무조건 아이폰 프로만 고집했던 사람들이.. 슬슬 이젠 일반형을 사는 이유?,2024-09-15T11:00:36Z,ITSub잇섭,그래서 뭐삿음?,0,2025-01-22T13:05:29Z


### 아이폰 16 후기

In [21]:
search_query = '아이폰 16 후기'
request = youtube.search().list(
    q=search_query,
    part='snippet',
    type='video',
    maxResults=50,
    order='viewCount',
    publishedAfter='2024-09-10T00:00:00Z'
)
search_response = request.execute()

video_data = [
    {
        'video_id': item['id']['videoId'],
        'title': item['snippet']['title'],
        'published_at': item['snippet']['publishedAt'],
        'channel_title': item['snippet']['channelTitle']
    }
    for item in search_response['items']
]

# 첫 번째 동영상 제거
video_data = video_data[1:]
next_page_token = search_response.get("nextPageToken")

if next_page_token:
    request = youtube.search().list(
        q=search_query,
        part='snippet',
        type='video',
        maxResults=1,  
        order='viewCount',
        publishedAfter='2024-09-10T00:00:00Z',
        pageToken=next_page_token
    )
    next_response = request.execute()
    
    additional_video = [
        {
            'video_id': item['id']['videoId'],
            'title': item['snippet']['title'],
            'published_at': item['snippet']['publishedAt'],
            'channel_title': item['snippet']['channelTitle']
        }
        for item in next_response['items']
    ]

    video_data.extend(additional_video) 

def get_all_comments(video_id):
    comments = []
    next_page_token = None
    while True:
        request = youtube.commentThreads().list(
            part='snippet',
            videoId=video_id,
            textFormat='plainText',
            pageToken=next_page_token
        )
        response = request.execute()

        for item in response['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            like_count = item['snippet']['topLevelComment']['snippet']['likeCount']
            published_at = item['snippet']['topLevelComment']['snippet']['publishedAt']
            comments.append({'comment': comment, 'like_count': like_count, 'published_at': published_at})

        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break
        time.sleep(2)

    return comments

all_comments = {}
for video in video_data:
    video_id = video['video_id']
    title = video['title']
    published_at = video['published_at']
    channel_title = video['channel_title']
    comments = get_all_comments(video_id)
    all_comments[video_id] = {
        'title': title,
        'published_at': published_at,
        'channel_title': channel_title,
        'comments': comments
    }

with open('youtube_i16_2.json', 'w', encoding='utf-8') as f:
    json.dump(all_comments, f, ensure_ascii=False, indent=4)

print('저장 완료')

저장 완료


In [None]:
csv_data = []
for video_id, video_info in all_comments.items():
    title = video_info['title']
    published_at = video_info['published_at']
    channel_title = video_info['channel_title']
    comments = video_info['comments']

    for comment in comments:
        csv_data.append({
            'video_id': video_id,
            'title': title,
            'publish_date': published_at,
            'channel_name': channel_title,
            'comment': comment['comment'],
            'like_count': comment['like_count'],
            'comment_publish_date': comment['published_at']
        })

# CSV 파일로 저장
csv_file = 'youtube_i16_2.csv'
csv_columns = ['video_id', 'title', 'publish_date', 'channel_name', 'comment', 'like_count', 'comment_publish_date']

with open(csv_file, mode='w', newline='', encoding='utf-8-sig') as file:
    writer = csv.DictWriter(file, fieldnames=csv_columns)
    writer.writeheader()
    writer.writerows(csv_data)