# Download Video Info for The Needle Drop

https://www.youtube.com/c/theneedledrop

In [1]:
%run bootstrap.ipynb

In [2]:
import json
import itertools

import sqlite3

import pandas as pd

from IPython.display import display, JSON, HTML

from castle.music_meta.youtube import YouTubeClient, YouTubeVideoRepo, YouTubeVideoInfoJob

## Config

In [3]:
username = "theneedledrop"

## Setup DB Connection

In [4]:
db = sqlite3.Connection("var/data/music.db")

## Setup Objects

In [5]:
yt_client = YouTubeClient(
    CONFIG["youtube"]["base-url"],
    CONFIG["youtube"]["api-key"]
)

yt_repo = YouTubeVideoRepo(db)

yt_job = YouTubeVideoInfoJob(
    yt_repo,
    yt_client,
    username
)

## Download Video Info

In [6]:
yt_job.download_latest_video_info()

2021-11-07 10:23:00,651 - root - INFO - Downloading video info: theneedledrop - starting from 2021-11-06 05:13:32+00:00.
2021-11-07 10:23:01,193 - root - INFO - Downloaded 0 video info records.


## Download Vieo Transcripts

In [10]:
yt_job.download_transcripts(username)

2021-11-07 11:10:11,294 - root - INFO - There are 272 videos to download.
2021-11-07 11:11:10,413 - root - INFO - Downloaded 100 transcripts.
2021-11-07 11:12:11,402 - root - INFO - Downloaded 200 transcripts.


## Test Reading Back From DB

In [13]:
video_info = (yt_repo
    .get_video_info_w_transcript_for_username(username)
    .sort_values(by='videoPublishedAt', ascending=False)
)
video_info

Unnamed: 0,channelTitle,videoId,videoPublishedAt,title,description,transcript
0,theneedledrop,3VOAXpSR9V0,2021-11-06 05:13:32+00:00,Marissa Nadler - The Path of the Clouds ALBUM ...,Listen: https://marissanadler.bandcamp.com/alb...,[{'text': 'hi everyone lost any boy tano here ...
1,theneedledrop,EZc4QCJmKdQ,2021-11-05 02:21:08+00:00,The War on Drugs - I Don't Live Here Anymore A...,Listen: https://www.youtube.com/watch?v=B3OEof...,[{'text': 'hi everyone don't think liv tano he...
2,theneedledrop,eBgdS1yHwRk,2021-11-04 03:24:04+00:00,Mastodon - Hushed and Grim ALBUM REVIEW,Listen: https://www.youtube.com/watch?v=haT5ar...,[{'text': 'hi everyone hush danny grimtano her...
3,theneedledrop,9ULbbP8yvY0,2021-11-03 02:18:22+00:00,"Reading Your Comments: Ed Sheeran, Lana Del Re...",Reacting to comments from these reviews:\n\nEd...,"[{'text': 'hi everyone anthony fantano here', ..."
4,theneedledrop,aae1g4R9KJA,2021-11-02 02:11:18+00:00,Ed Sheeran - = ALBUM REVIEW,Listen: https://www.youtube.com/watch?v=orJSJG...,[{'text': 'hey everyone don't respond to weird...
...,...,...,...,...,...,...
3658,theneedledrop,SFLvAe2k0Z0,2010-03-09 20:17:13+00:00,High On Fire- Snakes for the Divine Review,http://bit.ly/99ixTF\nhttp://bit.ly/9MXJlT\n\n...,"[{'text': 'I on fire snakes hi everyone', 'sta..."
3659,theneedledrop,ftt175Djodk,2010-03-08 22:59:59+00:00,"The Knife- Tomorrow, In a Year Album Review",3/10 http://bit.ly/6AHNzn\n\nSong that sounds ...,[{'text': 'goodnight tomorrow any year my need...
3660,theneedledrop,A9RLE74OXUY,2010-03-03 03:25:15+00:00,Gorillaz- Plastic Beach Review,7/10 http://bit.ly/9k9Chc\r\n\r\nGorillaz late...,"[{'text': 'you', 'start': 0.0, 'duration': 2.0..."
3661,theneedledrop,wbLFXN1yy5Q,2009-12-16 15:56:03+00:00,SLIPKNOT TRACK REVIEW,5/5/5/5/55/5555.5.5.555.5.5..5\n\nSLIPSLOT IS ...,[{'text': 'it's track review hi my needle drop...


In [14]:
video_info.transcript.notnull().mean()

0.9257439257439257