<a href="https://colab.research.google.com/github/chayaphon/DADS5001/blob/main/Project/Sources/Back-End/getdata.ipynb#scrollTo=IzFf1CXgipHb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Import require Library.

In [1]:
import pandas as pd
import requests

## Declear function for YouTube API.
### use for retrive data from you tube like channel detial and vidon detial.

In [2]:
api_key = ""
api_url = "https://www.googleapis.com/youtube/v3/"

def find_channel_id(channel_name):
    url = api_url + "search"
    params = {
        'part': 'snippet',
        'q': channel_name,
        'type': 'channel',
        'key': api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        channel_id = data['items'][0]['id']['channelId']
        return channel_id
    else:
        return None

def get_channel_details(channel_id):
    url = api_url + "channels"
    params = {
        'part': ['statistics','contentDetails'],
        'id': channel_id,
        'key': api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:
        data = response.json()
        return {
            'channel_id' : data['items'][0]['id'],
            'playlists_id': data['items'][0]['contentDetails']['relatedPlaylists']['uploads'],
            'subscriber_count': data['items'][0]['statistics']['subscriberCount'],
            'video_count': data['items'][0]['statistics']['videoCount'],
            'view_count': data['items'][0]['statistics']['viewCount']
        }
    else:
        return None

def get_video_statistics(video_id):
    url = api_url + "videos"
    params = {
        'part': 'snippet,statistics',
        'id': video_id,
        'key': api_key
    }
    response = requests.get(url, params=params)
    if response.status_code == 200:       
        video_data = response.json()['items'][0]
        return {
            'title': video_data['snippet']['title'],
            'publishedAt': video_data['snippet']['publishedAt'],
            'likes': video_data['statistics'].get('likeCount', 0),
            'views': video_data['statistics'].get('viewCount', 0)
        }
    else:
        return None
    
def get_videos_details(playlist_id):
    videos = []
    next_page_token = None
    while True:
        url = api_url + "playlistItems"
        params = {
            'part': 'snippet',
            'playlistId': playlist_id,
            'maxResults': 50,
            'pageToken': next_page_token,
            'key': api_key
        }
        response = requests.get(url, params=params)
        if response.status_code == 200:
            data = response.json()
            for item in data.get('items', []):
                video_id = item['snippet']['resourceId']['videoId']
                video_details = get_video_statistics(video_id)
                videos.append(video_details)
            next_page_token = data.get('nextPageToken')
            if not next_page_token:
                break
    return videos

## Specify list of channel name
### Please put the channel name in to lists of which you would like to see data

In [3]:
channel_list = ['Go Went Go','Pigkaploy','asapapailong','YES I GO']

## Load Header Data for each channel

In [4]:
df_header = pd.DataFrame()

for channel in channel_list:
    channel_id = find_channel_id(channel)
    if not channel_id:
        continue
    data = get_channel_details(channel_id)
    if not data:
        continue
    df = pd.json_normalize(data)
    df.insert(0, 'channel_name', channel)
    df_header = pd.concat([df_header , df] , ignore_index=True)

df_header

Unnamed: 0,channel_name,channel_id,playlists_id,subscriber_count,video_count,view_count
0,Go Went Go,UCqs3pijNrYHZCdl_HGfgTQw,UUqs3pijNrYHZCdl_HGfgTQw,1340000,502,231763874
1,Pigkaploy,UCT8UgpcpqFV6AgOfs_52QmA,UUT8UgpcpqFV6AgOfs_52QmA,1340000,323,126523458
2,asapapailong,UCh4h-Jc39XBBvF973COet0Q,UUh4h-Jc39XBBvF973COet0Q,822000,552,96091701
3,YES I GO,UCtx3Ac_gLDL5V_OTmgNzHpQ,UUtx3Ac_gLDL5V_OTmgNzHpQ,1330000,523,161564249


## Load Video detail Data for each channel.

In [5]:
df_detail = pd.DataFrame()

for playlist_id, channel_id in zip(df_header['playlists_id'], df_header['channel_id']):
    data = get_videos_details(playlist_id)
    df = pd.json_normalize(data)
    df.insert(0, 'channel_id', channel_id)
    df_detail = pd.concat([df_detail , df] , ignore_index=True)

df_detail

Unnamed: 0,channel_id,title,publishedAt,likes,views
0,UCqs3pijNrYHZCdl_HGfgTQw,Karakoram Highway ทางหลวงที่มีวิวสวยที่สุดในโล...,2024-05-04T05:00:15Z,1408,51377
1,UCqs3pijNrYHZCdl_HGfgTQw,เกาะมันนอก นอนบนเกาะแบบส่วนตัว และล่องเรือกลาง...,2024-05-01T11:30:17Z,2457,170361
2,UCqs3pijNrYHZCdl_HGfgTQw,9 วัน 8 คืน Road Trip สวิตเซอร์แลนด์ และพาทั้ง...,2024-04-29T11:30:29Z,1652,75411
3,UCqs3pijNrYHZCdl_HGfgTQw,ขับรถเที่ยวรอบเกาะ Okinawa 18 จุด 3 วัน 2 คืน ...,2024-04-27T05:00:30Z,4121,311630
4,UCqs3pijNrYHZCdl_HGfgTQw,สำรวจที่มาของรังนก วิธีการเก็บ ไปจนถึงการผลิต ...,2024-04-24T11:30:06Z,3125,206571
...,...,...,...,...,...
1898,UCtx3Ac_gLDL5V_OTmgNzHpQ,ตออีฟ! ซาอุไม่ได้มีแค่ Makkah/Madinah,2018-07-28T07:02:40Z,3311,116869
1899,UCtx3Ac_gLDL5V_OTmgNzHpQ,เดินงาน Halal Expo Hatyai 2018,2018-07-22T10:00:02Z,818,35007
1900,UCtx3Ac_gLDL5V_OTmgNzHpQ,Yes I Go trailer,2018-07-20T10:16:20Z,463,28953
1901,UCtx3Ac_gLDL5V_OTmgNzHpQ,ข้าวยำอบกรอบ รีวิว!,2018-07-17T13:22:11Z,924,30374


## Merge Header and Detail to a single dataframe

In [6]:
df_merged = pd.merge(df_header, df_detail, on='channel_id', how='left')
df_merged

Unnamed: 0,channel_name,channel_id,playlists_id,subscriber_count,video_count,view_count,title,publishedAt,likes,views
0,Go Went Go,UCqs3pijNrYHZCdl_HGfgTQw,UUqs3pijNrYHZCdl_HGfgTQw,1340000,502,231763874,Karakoram Highway ทางหลวงที่มีวิวสวยที่สุดในโล...,2024-05-04T05:00:15Z,1408,51377
1,Go Went Go,UCqs3pijNrYHZCdl_HGfgTQw,UUqs3pijNrYHZCdl_HGfgTQw,1340000,502,231763874,เกาะมันนอก นอนบนเกาะแบบส่วนตัว และล่องเรือกลาง...,2024-05-01T11:30:17Z,2457,170361
2,Go Went Go,UCqs3pijNrYHZCdl_HGfgTQw,UUqs3pijNrYHZCdl_HGfgTQw,1340000,502,231763874,9 วัน 8 คืน Road Trip สวิตเซอร์แลนด์ และพาทั้ง...,2024-04-29T11:30:29Z,1652,75411
3,Go Went Go,UCqs3pijNrYHZCdl_HGfgTQw,UUqs3pijNrYHZCdl_HGfgTQw,1340000,502,231763874,ขับรถเที่ยวรอบเกาะ Okinawa 18 จุด 3 วัน 2 คืน ...,2024-04-27T05:00:30Z,4121,311630
4,Go Went Go,UCqs3pijNrYHZCdl_HGfgTQw,UUqs3pijNrYHZCdl_HGfgTQw,1340000,502,231763874,สำรวจที่มาของรังนก วิธีการเก็บ ไปจนถึงการผลิต ...,2024-04-24T11:30:06Z,3125,206571
...,...,...,...,...,...,...,...,...,...,...
1898,YES I GO,UCtx3Ac_gLDL5V_OTmgNzHpQ,UUtx3Ac_gLDL5V_OTmgNzHpQ,1330000,523,161564249,ตออีฟ! ซาอุไม่ได้มีแค่ Makkah/Madinah,2018-07-28T07:02:40Z,3311,116869
1899,YES I GO,UCtx3Ac_gLDL5V_OTmgNzHpQ,UUtx3Ac_gLDL5V_OTmgNzHpQ,1330000,523,161564249,เดินงาน Halal Expo Hatyai 2018,2018-07-22T10:00:02Z,818,35007
1900,YES I GO,UCtx3Ac_gLDL5V_OTmgNzHpQ,UUtx3Ac_gLDL5V_OTmgNzHpQ,1330000,523,161564249,Yes I Go trailer,2018-07-20T10:16:20Z,463,28953
1901,YES I GO,UCtx3Ac_gLDL5V_OTmgNzHpQ,UUtx3Ac_gLDL5V_OTmgNzHpQ,1330000,523,161564249,ข้าวยำอบกรอบ รีวิว!,2018-07-17T13:22:11Z,924,30374


## Export data to CSV

In [7]:
df_merged.to_csv('../Data/data.csv', index=False)