In [1]:
import requests
import urllib.parse
import json
import webbrowser

In [2]:
# Read credentials
credentials_path = "client_secrets.json"
# Credentials obtained from https://console.developers.google.com,
# creating an application with Youtube API access and a "Installed application" token

# You'll probably have to make your own...

with open(credentials_path, "r") as f:
    credentials = json.load(f)

In [3]:
# Get OAuth2 token
oob_redirect = "urn:ietf:wg:oauth:2.0:oob"
oauth_entrypoint = "https://accounts.google.com/o/oauth2/auth"
parameters = {
    "client_id": credentials["installed"]["client_id"],
    "redirect_uri": oob_redirect,
    "response_type": "code",
    "scope": "https://www.googleapis.com/auth/youtube.force-ssl",
}
__ = webbrowser.open_new_tab(oauth_entrypoint + "?" + urllib.parse.urlencode(parameters))

In [4]:
# Please copy past the token here
token = "{{Please copy past the token here.}}"

In [5]:
oauth_entrypoint2 = "https://accounts.google.com/o/oauth2/token"
parameters = {
    "code": token,
    "client_id": credentials["installed"]["client_id"],
    "client_secret": credentials["installed"]["client_secret"],
    "grant_type": "authorization_code",
    "redirect_uri": oob_redirect,
}
response = requests.post(oauth_entrypoint2, data=parameters)

In [6]:
# Saves you user token to a file son we don't have to do this all over again
# (at least until the token exires which is after an hour)

user_secrets = response.json()
with open("user_secrets.json", "w") as f:
    json.dump(user_secrets, f, sort_keys=True,
        indent=4, separators=(',', ': ')
    )

In [7]:
# If you already have a token, you can start here !
import requests
import urllib.parse
import json
import webbrowser
with open("user_secrets.json", "r") as f:
    user_secrets = json.load(f)

In [8]:
# You need a video that belongs to you, otherwise you
# don't have the right to download the subtitles using the API
video_id = "SdV-oKRQXVY"  # you get that in the URL

In [9]:
# Preparing the Youtube API calls
secret_key = user_secrets["access_token"]
caption_list_entrypoint = "https://www.googleapis.com/youtube/v3/captions?part=snippet&videoId={video_id}"
caption_download_entrypoint = "https://www.googleapis.com/youtube/v3/captions/{caption_id}?tfmt=srt"

In [10]:
# A request function for the Youtube API
class GoogleAuth(requests.auth.AuthBase):
    def __call__(self, request):
        request.headers["Authorization"] = "Bearer {}".format(secret_key)
        return request

def request(url, **kwargs):
    response = requests.get(
        url.format(**{key: urllib.parse.quote(value) for key, value in kwargs.items()}),
        auth=GoogleAuth()
    )
    return response

In [11]:
# This returns the list of all the subtitles tracks
response = request(caption_list_entrypoint, video_id=video_id).json()

In [12]:
# This finds the (first) automatic subtitle track ID
caption_id = next(iter(element["id"] for element in response["items"] if element["snippet"]["trackKind"] == "ASR"))

In [13]:
# This returns the content of the subtitles in SRT format
response = request(caption_download_entrypoint, caption_id=caption_id)

In [14]:
# Let's save the srt for later use !

# srt = pysrt.from_string(srt_content.decode("utf-8"))
with open("subtitles.srt", "w") as f:
    f.write(response.content.decode("utf-8"))

In [15]:
# From pip
import pysrt

In [16]:
srt = pysrt.open("subtitles.srt")

In [47]:
# This is my own transcription of the video
real_text = """Okay, so I'm trying Google API for now, and I need a video that contains some text.
Thank you very much.""".split()

In [48]:
# There's probably MUCH improvement to do to this. (using re ?)
def tidy_text(text):
    text = text.lower()
    for punc in ".,;?!":
        text = text.replace(punc, " ")
    return text

In [50]:
current_word = 0
search_words = 30

In [57]:
# Using the Difflib, we find the most probable shift between
# a srt fragment and the official transcript (we move forward in the
# text and search evertime at +- 30 words)
import difflib

start_of_fragments = []
for fragment in srt:
    srt_text = fragment.text
    search_range = range(max(current_word - search_words, 0), min(current_word + search_words, len(real_text)))
    possibilities = {" ".join(searched[i:]): i for i in search_range}
    closest = difflib.get_close_matches(srt_text, possibilities.keys(), 1)[0]
    # At this point, we could try to save that value, but there's no assurance that
    # we'll no be missing a word.
    # Better match the start position of a fragment than the fragment themselves
    # so that we're sure that they're consecutives.
    shift = possibilities[closest]
    start_of_fragments.append(shift)
    current_word += shift

In [59]:
def double_iter_then_none(iterable):
    """
    >>> list(double_iter_then_none([1, 2, 3]))
    ... [(1, 2), (2, 3), (3, None)]
    """
    it = iter(iterable)
    v1 = next(it)
    try:
        v2 = next(it)
    except StopIteration:
        yield(v1, None)
        return
    try:
        while True:
            yield (v1, v2)
            v1, v2 = v2, next(it)
    except StopIteration:
        yield (v2, None) 

# We compute the fragments based of the shifted values found    
new_fragements = [" ".join(real_text[start:end]) for start, end in double_iter_then_none(start_of_fragments)]

In [60]:
for fragment, new_text in zip(srt, new_fragements):
    fragment.text = new_text

# Save the new SRT file 
srt.save("proper.srt", encoding="utf-8")

In [None]:
# YAY