forked from KA-Advocates/KATranslationCheck
-
Notifications
You must be signed in to change notification settings - Fork 0
/
LintReport.py
executable file
·76 lines (66 loc) · 2.47 KB
/
LintReport.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Official Khan Academy Lint reader
"""
from collections import namedtuple
import csv
import os
import requests
import time
import re
from ansicolor import black
from html.parser import HTMLParser
from UpdateAllFiles import downloadCrowdinById, getCrowdinSession
import shelve
LintEntry = namedtuple("LintEntry", ["date", "url", "crid", "text", "errmsg",
"msgid", "msgstr", "comment", "filename"])
cache = None
def downloadCrowdinByIdCached(session, crid, lang):
global cache
cacheKey = "{0}-{1}".format(lang, crid)
if cache is None:
cache = shelve.open("/tmp/katc-cache")
if cacheKey in cache:
return cache[cacheKey]
# TODO cache expiration
cdata = downloadCrowdinById(session, crid, lang)
cache[cacheKey] = cdata
return cdata
class NoResultException(Exception):
pass
def readLintCSV(filename):
"Read a KA lint file"
with open(filename) as lintin:
reader = csv.reader(lintin, delimiter=',')
return [LintEntry(row[0], row[1], row[1].rpartition("#")[2],
row[2], row[3], None, None, None, None) for row in reader]
def readAndMapLintEntries(filename, lang="de"):
"""
Enrich a list of lint entries with msgid and msgstr information
"""
session = getCrowdinSession(domain="https://crowdin.com")
cnt = 0
h = HTMLParser()
for entry in readLintCSV(filename):
if entry.crid.startswith("https://translate.khanacademy.org"):
msgid = "[KA Translate link]"
msgstr = "[KA Translate link]"
comment = entry.text
filename = "[KA Translate link]"
else:
msgid, msgstr, comment, filename = downloadCrowdinByIdCached(session, entry.crid, lang)
#comment = re.sub(__urlRegex, r"<a href=\"\1\">\1</a>", comment)
msgid = msgid.replace(" ", "⸱").replace("\t", "→")
msgstr = msgstr.replace(" ", "⸱").replace("\t", "→")
comment = h.unescape(comment).replace("<a href=", "<a target=\"_blank\" href=")
yield LintEntry(entry.date, entry.url, entry.crid, entry.text,
entry.errmsg, msgid, msgstr, comment, filename)
cnt += 1
if cnt % 100 == 0:
print("Mapped {0} lint entries".format(cnt))
if __name__ == "__main__":
url = getLatestLintPostURLForLanguage()
print(url)
#print(list(getMappedLintEntries("cache/de-lint.json")))
#print(readLintCSV("de-lint.csv"))