In [1]:
import json
import requests
import boto3
from boto3.dynamodb.conditions import Key, Attr
dynamodb = boto3.resource('dynamodb')

In [2]:
API_KEY = "AIzaSyBnkX_Knaws7tumWbxSEGVf491NZedrw88"

In [3]:
def get_query_string(query):
  query = { **query, "key": API_KEY }
  container = []
  for i in query.items():
    container.append(f"{i[0]}={i[1]}")
  query_string = f"?{'&'.join(container)}"
  return query_string

In [4]:
def get_lists_all(base_url, query):
  query_string = get_query_string(query)
  url = f"{base_url}{query_string}"
  res = requests.get(url)
  if not res.ok:
    print(res.status_code)
    return []
  data = json.loads(res.content)
  lists = [
    {
      "title": x['snippet']['title'],
      "id": x['id']
    } for x in data['items']
  ]
  if "nextPageToken" in data:
    next_query = { **query, 'pageToken': data["nextPageToken"] }
    return [*lists, *get_lists_all(base_url, next_query)]
  return lists

In [5]:
def get_playlists():
  base_url = 'https://www.googleapis.com/youtube/v3/playlists'
  query = {
    "part": "snippet",
    "channelId": "UCU5LUUBNDzGOeAri5Wk8w5Q",
    "maxResults": 50
  }  
  lists = get_lists_all(base_url, query)
  lists = [x for x in lists if x['title'].startswith('BGM')]
  lists = sorted(lists, key=lambda x: x['title'])
  return lists

playlists = get_playlists()

In [6]:
def get_list_items_all(base_url, query):
  query_string = get_query_string(query)
  url = f"{base_url}{query_string}"
  res = requests.get(url)
  if not res.ok:
    print(res.status_code)
    return []
  data = json.loads(res.content)
  list_items = [
    {
      "title": x['snippet']['title'],
      "id": x['snippet']['resourceId']['videoId'],
      "channel": x['snippet']['videoOwnerChannelTitle'] if 'videoOwnerChannelTitle' in x['snippet'] else 'unavailable',
      "channel_id": x['snippet']['videoOwnerChannelId'] if 'videoOwnerChannelTitle' in x['snippet'] else 'unavailable',
      "from": "list",
      "active": True if 'videoOwnerChannelTitle' in x['snippet'] else False
    } for x in data['items']
  ]
  if "nextPageToken" in data:
    next_query = { **query, 'pageToken': data["nextPageToken"] }
    return [*list_items, *get_list_items_all(base_url, next_query)]
  return list_items

In [7]:
def get_playlist_items(playlist_id):
  base_url = 'https://www.googleapis.com/youtube/v3/playlistItems'
  query = {
    "part": "snippet",
    "playlistId": playlist_id,
    "maxResults": 50
  }
  list_items = get_list_items_all(base_url, query)
  return list_items

In [11]:
lists_items = [get_playlist_items(x['id']) for x in playlists]

In [12]:
flatten_items = sum(lists_items, [])
total_items = [dict(t) for t in {tuple(d.items()) for d in flatten_items}]

In [13]:
table_total = dynamodb.Table('total')
with table_total.batch_writer() as batch:
  for item in total_items:
    batch.put_item(
      Item=item
    )

In [14]:
res_total = table_total.scan()['Items']
print(len(res_total))

1718


In [19]:
table_monthly = dynamodb.Table('monthly')

In [20]:
with table_monthly.batch_writer() as batch:
  for idx, items in enumerate(lists_items):  
    row = {
      **playlists[idx],
      "items": items
    }
    batch.put_item(
      Item=row
    )

In [39]:
res_monthly = table_monthly.scan(
  FilterExpression=Attr('title').eq('BGM 2023 02')
)['Items'][0]
res_monthly['id']

'PLj63tGIo1sSoJptRycAjrMxNVLatBejpp'

In [40]:
items = res_monthly['items']

In [41]:
items[3]

[{'channel': 'Author wind - Topic',
  'active': True,
  'from': 'list',
  'id': 'b12-WUgXAzg',
  'title': 'Fairy Tale',
  'channel_id': 'UCeieSudBwInJNTbW0ylHfGg'}]

In [22]:
item_index = items.index(
              {
                "title": 'Fairy Tale',
                "id": 'b12-WUgXAzg',
                "channel": 'Author wind - Topic',
                "channel_id": 'UCeieSudBwInJNTbW0ylHfGg',
                "from": 'list',
                "active": True
              }
            )

In [23]:
item_index

3

In [24]:
items.pop(item_index)

{'channel': 'Author wind - Topic',
 'active': True,
 'from': 'list',
 'id': 'b12-WUgXAzg',
 'title': 'Fairy Tale',
 'channel_id': 'UCeieSudBwInJNTbW0ylHfGg'}

In [25]:
new_item = [
  {
    "title": 'Fairy Tale',
    "id": 'b12-WUgXAzg',
    "channel": 'Author wind - Topic',
    "channel_id": 'UCeieSudBwInJNTbW0ylHfGg',
    "from": 'list',
    "active": False
  }
]

In [26]:
items.insert(item_index, new_item)

In [28]:
items[3]

[{'title': 'Fairy Tale',
  'id': 'b12-WUgXAzg',
  'channel': 'Author wind - Topic',
  'channel_id': 'UCeieSudBwInJNTbW0ylHfGg',
  'from': 'list',
  'active': False}]

In [35]:
table_monthly.put_item(
  Item={
    **res_monthly,
    'items': items
  }
)

{'ResponseMetadata': {'RequestId': 'I0CSPBC2BM5QBVMBR0BM7VNCU3VV4KQNSO5AEMVJF66Q9ASUAAJG',
  'HTTPStatusCode': 200,
  'HTTPHeaders': {'server': 'Server',
   'date': 'Mon, 06 Feb 2023 09:31:10 GMT',
   'content-type': 'application/x-amz-json-1.0',
   'content-length': '2',
   'connection': 'keep-alive',
   'x-amzn-requestid': 'I0CSPBC2BM5QBVMBR0BM7VNCU3VV4KQNSO5AEMVJF66Q9ASUAAJG',
   'x-amz-crc32': '2745614147'},
  'RetryAttempts': 0}}

In [None]:
res_total = table_total.scan()['Items']
print(res_total)

In [None]:
total_id_list = [x for x in res_total]