forked from KA-Advocates/KATranslationCheck
-
Notifications
You must be signed in to change notification settings - Fork 0
/
VideoTranslations.py
executable file
·60 lines (53 loc) · 1.77 KB
/
VideoTranslations.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
#!/usr/bin/env python3
"""
Dubbed video mapping code
"""
import requests
import re
import json
from ansicolor import black, red
import functools
from collections import defaultdict
from Languages import findAllLanguages
from multiprocessing import Pool
import csv
try:
from StringIO import StringIO
except ImportError:
from io import StringIO
def fetchVideoTranslationsCSV(lang):
response = requests.get("https://www.khanacademy.org/translations/videos/{0}_all_videos.csv".format(lang))
sio = StringIO(response.text)
reader = csv.reader(sio)
result = []
for row in reader:
try:
slug, orig_vid, vid = row[8], row[5], row[6]
# Ignore non translated videos
if not vid: continue
url_tpl = "https://www.youtube.com/watch?v={0}"
url = url_tpl.format(vid)
orig_url = url_tpl.format(orig_vid)
result.append((slug, url, orig_url))
except IndexError:
continue
return result
def updateVideoMap(args):
"""
Generates VideoTranslations.json from Khan Academy data
"""
pool = Pool(48)
languages = list(sorted(list(findAllLanguages())))
print(black("Fetching language videomaps", bold=True))
langresults = pool.map(fetchVideoTranslationsCSV, languages)
videoMap = defaultdict(dict)
for language, langresult in zip(languages, langresults):
# Insert results into main language map
for slug, url, orig_url in langresult:
videoMap[slug][language] = url
videoMap[slug]["en"] = orig_url
print(black("Writing JSON videomap...", bold=True))
with open(os.path.join("cache", "VideoMap.json"), "w") as outfile:
json.dump(videoMap, outfile)
if __name__ == "__main__":
updateVideoMap(None)