### Youtube Data API를 위한 환경설정

In [None]:
!pip install google-api-python-client pandas


In [None]:
from googleapiclient.discovery import build
import pandas as pd
from datetime import datetime, timedelta


In [None]:
# .env파일에서 YOUTUBE_API_KEY 가져오기
import os
api_key = os.getenv('YOUTUBE_API_KEY')


### initialize_youtube: 
Initializes the YouTube API client.

In [None]:
def initialize_youtube(api_key):
    return build('youtube', 'v3', developerKey=api_key)



### search_channels: 
Searches for channels based on the provided keyword and returns a list of channel IDs and titles.

In [None]:
def search_channels(youtube, query, max_results=10):
    request = youtube.search().list(
        q=query,
        part="snippet",
        type="channel",
        maxResults=max_results
    )
    response = request.execute()
    channels = []
    for item in response['items']:
        channels.append({
            'channelId': item['snippet']['channelId'],
            'channelTitle': item['snippet']['title']
        })
    return channels

### get_channel_stats: 
Fetches the statistics of a specific channel, including subscriber count.

In [None]:
def get_channel_stats(youtube, channel_id):
    request = youtube.channels().list(
        part="statistics",
        id=channel_id
    )
    response = request.execute()
    return response['items'][0]['statistics']

### get_video_stats: 
Retrieves video statistics (likes, dislikes, views) for videos posted within a specified date range.



In [None]:
def get_video_stats(youtube, channel_id, published_after, published_before):
    request = youtube.search().list(
        part="id,snippet",
        channelId=channel_id,
        publishedAfter=published_after,
        publishedBefore=published_before,
        maxResults=50,
        type="video"
    )
    response = request.execute()
    video_stats = []
    for item in response['items']:
        video_id = item['id']['videoId']
        video_details = get_video_details(youtube, video_id)
        video_stats.append({
            'videoId': video_id,
            'title': item['snippet']['title'],
            'publishedAt': item['snippet']['publishedAt'],
            'viewCount': video_details.get('viewCount', 'N/A'),
            'likeCount': video_details.get('likeCount', 'N/A'),
            'dislikeCount': video_details.get('dislikeCount', 'N/A')
        })
    return video_stats


### get_video_details: 
Fetches detailed statistics for a specific video.



In [None]:
def get_video_details(youtube, video_id):
    request = youtube.videos().list(
        part="statistics",
        id=video_id
    )
    response = request.execute()
    video_stats = response['items'][0]['statistics']
    return video_stats

### get_video_comments: 
Collects all comments on a given video, handling pagination to fetch all comments.



In [None]:
def get_video_comments(youtube, video_id):
    comments = []
    request = youtube.commentThreads().list(
        part="snippet",
        videoId=video_id,
        maxResults=100,
        textFormat="plainText"
    )
    response = request.execute()
    while request:
        response = request.execute()
        for item in response['items']:
            top_comment = item['snippet']['topLevelComment']['snippet']
            comments.append({
                'videoId': video_id,
                'commentId': item['id'],
                'authorDisplayName': top_comment['authorDisplayName'],
                'textOriginal': top_comment['textOriginal'],
                'likeCount': top_comment['likeCount'],
                'publishedAt': top_comment['publishedAt']
            })
        request = youtube.commentThreads().list_next(request, response)
    return comments

### main: 
Orchestrates the data collection and saves it to two CSV files (one for video stats and one for comments).

In [None]:
def main(api_key, query, start_date, end_date):
    youtube = initialize_youtube(api_key)
    
    channels = search_channels(youtube, query)
    print("Channels Found:", channels)
    
    all_channel_stats = []
    all_comments = []

    for channel in channels:
        channel_stats = get_channel_stats(youtube, channel['channelId'])
        print(f"Stats for {channel['channelTitle']}: {channel_stats}")
        
        video_stats = get_video_stats(
            youtube, 
            channel['channelId'], 
            start_date, 
            end_date
        )
        
        for video_stat in video_stats:
            all_channel_stats.append({
                'channelTitle': channel['channelTitle'],
                'channelId': channel['channelId'],
                'subscribers': channel_stats.get('subscriberCount', 'N/A'),
                'videoId': video_stat['videoId'],
                'title': video_stat['title'],
                'publishedAt': video_stat['publishedAt'],
                'viewCount': video_stat['viewCount'],
                'likeCount': video_stat['likeCount'],
                'dislikeCount': video_stat['dislikeCount']
            })
            comments = get_video_comments(youtube, video_stat['videoId'])
            all_comments.extend(comments)

    df_stats = pd.DataFrame(all_channel_stats)
    df_stats.to_csv('youtube_channel_stats.csv', index=False)
    
    df_comments = pd.DataFrame(all_comments)
    df_comments.to_csv('youtube_video_comments.csv', index=False)

    print("Data saved to youtube_channel_stats.csv and youtube_video_comments.csv")


### Usage Notes:
 - Replace `YOUR_API_KEY` with your actual YouTube Data API key.
 - Adjust the `start_date` and `end_date` to your specific date range needs.
 - The script fetches up to 50 videos per channel due to API limits. Pagination handling for videos can be added for more extensive data collection.
### Precautions:
 - **API Quota:** YouTube API calls consume quota, so monitor your usage in the Google Developer Console.
 - **Rate Limits:** Be aware of rate limits to avoid being temporarily blocked from making further API calls.
 - **Data Privacy:** Ensure compliance with YouTube's terms of service and data privacy policies.



In [None]:

if __name__ == "__main__":
    api_key = 'YOUR_API_KEY'  # Replace with your YouTube Data API key
    query = '주식정보제공채널'
    start_date = '2022-01-01T00:00:00Z'  # Example start date
    end_date = '2022-12-31T23:59:59Z'    # Example end date
    main(api_key, query, start_date, end_date)