In [None]:
from cronjob import get_repositories, prod_domain, zendesk_cronjob
from crowdin import delete_code_translations, get_orphaned_files_as_data_frame
from crowdin_sync import get_branch_files, get_repository_state, update_repository
from datetime import datetime
from file_manager import get_eligible_files, get_local_file
import git
import json
import os
import pandas as pd
import re
from repository import get_subrepositories, initial_dir
from zendesk import download_zendesk_articles, update_zendesk_translation

In [None]:
repositories_df = pd.read_csv('repositories.csv', comment='#')
repositories_df

In [None]:
def get_update_result(index):
    repository = get_repositories(False)[index]
    return repository, get_repository_state(repository)

In [None]:
get_repositories(False)[5]

# List sections

In [None]:
with open('categories_%s.json' % prod_domain, 'r') as f:
    categories = json.load(f)

In [None]:
pd.DataFrame([
    {'category_id': category['id'], 'name': category['name']}
        for category in categories.values()
            if category['id'] == 360004183251
])

In [None]:
with open('sections_%s.json' % prod_domain, 'r') as f:
    sections = json.load(f)

In [None]:
pd.DataFrame([
    {'section_id': section['id'], 'name': section['name']}
        for section in sections.values()
            if section['category_id'] == 360004183251
])

In [None]:
with open('all_articles_%s.json' % prod_domain, 'r') as f:
    all_articles = json.load(f)

In [None]:
pd.DataFrame([
    {'article_id': article['id'], 'title': article['title']}
        for article in all_articles.values()
            if article['section_id'] in [360010296132]
])

# Test updating ignore list

In [None]:
from file_manager import get_eligible_files

def add_ignore_files(repository, folder, language):
    os.chdir(repository.github.git_root)
    
    translated_files = get_eligible_files(repository, git.ls_files(folder), language)

    os.chdir(initial_dir)

    if language != 'en':
        source_files = [
            'en/' + file[3:] if file[0:3] == '%s/' % language else file.replace('/%s/' % language, '/en/')
                for file in translated_files
        ]
    else:
        source_files = translated_files
    
    entries = set()

    for x in pd.read_csv('%s/ignore.csv' % initial_dir).to_records(index=False):
        entries.add('%s/%s' % (x[0], x[1]))

        if x[1][-3:] == '.md':
            entries.add('%s/%s.markdown' % (x[0], x[1][:-3]))
    
    for x in source_files:
        entries.add('%s/%s' % (repository.github.upstream, x))

        if x[-3:] == '.md':
            entries.add('%s/%s.markdown' % (repository.github.upstream, x[:-3]))

    def get_ignore_entry(path):
        remote, repo_name, file = path.split('/', 2)
        return { 'repository': remote + '/' + repo_name, 'file': file }
    
    df = pd.DataFrame(sorted(
        [get_ignore_entry(x) for x in entries],
        key = lambda x: (x['repository'], x['file']
    )))
    
    df.to_csv('%s/ignore.csv' % initial_dir, index=False)

In [None]:
repository, update_result = get_update_result(4)
assert(repository.github.origin == 'holatuwol/liferay-docs')

In [None]:
new_files, all_files, file_info = update_result

In [None]:
add_ignore_files(repository, 'ja/develop/tutorials/articles/01-introduction-to-liferay-development', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/100-tooling', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/120-customizing', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/140-application-security', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/150-web-services', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/160-search', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/185-liferay-js-apis', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/210-front-end-taglibs', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/240-product-navigation', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/280-workflow', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/300-internationalization', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/310-application-display-templates', 'ja')
add_ignore_files(repository, 'ja/develop/tutorials/articles/380-troubleshooting', 'ja')

In [None]:
repository, update_result = get_update_result(7)

# Remove code blocks

In [None]:
repository, update_result = get_update_result(7)
assert(repository.github.origin == 'holatuwol/liferay-learn')

In [None]:
new_files, all_files, file_info = update_result

In [None]:
for file in all_files:
    delete_code_translations(repository, file, file_info)

# Check orphaned files statistics

In [None]:
repository, update_result = get_update_result(3)
assert(repository.github.origin == 'holatuwol/liferay-learn')

In [None]:
df = get_orphaned_files_as_data_frame(repository, update_result)

In [None]:
def get_crowdin_link(file_id):
    return '<a href="https://crowdin.com/translate/liferay-documentation/%s/en-ja">%s</a>' % (file_id, file_id)

In [None]:
df.style.format({'id1': get_crowdin_link, 'id2': get_crowdin_link})

In [None]:
df[df['translated1'] > 0].style.format({'id1': get_crowdin_link, 'id2': get_crowdin_link})