Skip to content

Commit

Permalink
Merge pull request #46 from averkij/t/master/unused-confirm
Browse files Browse the repository at this point in the history
Add hide unused lines switches and storage for these flags.
  • Loading branch information
averkij committed Mar 18, 2021
2 parents 623e178 + 421b7fa commit 6543e74
Show file tree
Hide file tree
Showing 11 changed files with 212 additions and 1,421 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ be/models/labse.bin
be/models/sentence_transformers_xlm_100.bin
be/models/sentence_transformers-v2.bin
docs/source/_build
be/data
be/static/img
Binary file removed be/data/asdasd2/user.db
Binary file not shown.
Binary file removed be/data/serg/db/ru/zh/chekhov_6_ru.txt.db
Binary file not shown.
1,340 changes: 0 additions & 1,340 deletions be/data/serg/proxy/zh/chekhov_6_zh.txt

This file was deleted.

109 changes: 81 additions & 28 deletions be/helper.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,10 @@ def get_sim_grades(processing_file):
return docs["sim_grades"]


def clean_img_user_foler(username, file):
def clean_img_user_foler(username, align_guid):
"""Clean user folder with images"""
imgs = get_files_list_with_path(os.path.join(
con.STATIC_FOLDER, con.IMG_FOLDER, username), mask=f"{os.path.basename(file)}.best_*.png")
con.STATIC_FOLDER, con.IMG_FOLDER, username), mask=f"{align_guid}.best_*.png")
for img in imgs:
if os.path.isfile(img):
os.remove(img)
Expand Down Expand Up @@ -108,9 +108,9 @@ def init_document_db(db_path):
os.remove(db_path)
with sqlite3.connect(db_path) as db:
db.execute(
'create table splitted_from(id integer primary key, text nvarchar)')
'create table splitted_from(id integer primary key, text nvarchar, exclude integer)')
db.execute(
'create table splitted_to(id integer primary key, text nvarchar)')
'create table splitted_to(id integer primary key, text nvarchar, exclude integer)')
db.execute(
'create table proxy_from(id integer primary key, text nvarchar)')
db.execute('create table proxy_to(id integer primary key, text nvarchar)')
Expand All @@ -131,15 +131,15 @@ def fill_document_db(db_path, splitted_from, splitted_to, proxy_from, proxy_to):
with open(splitted_from, mode="r", encoding="utf-8") as input_path:
lines = input_path.readlines()
with sqlite3.connect(db_path) as db:
db.executemany("insert into splitted_from(text) values (?)", [
(x.strip(),) for x in lines])
db.executemany("insert into splitted_from(text, exclude) values (?,?)", [
(x.strip(), 0) for x in lines])

if os.path.isfile(splitted_to):
with open(splitted_to, mode="r", encoding="utf-8") as input_path:
lines = input_path.readlines()
with sqlite3.connect(db_path) as db:
db.executemany("insert into splitted_to(text) values (?)", [
(x.strip(),) for x in lines])
db.executemany("insert into splitted_to(text, exclude) values (?,?)", [
(x.strip(), 0) for x in lines])

if os.path.isfile(proxy_from):
with open(proxy_from, mode="r", encoding="utf-8") as input_path:
Expand Down Expand Up @@ -352,24 +352,62 @@ def get_splitted_from_by_id(db_path, ids):
"""Get lines from splitted_from by ids"""
res = []
with sqlite3.connect(db_path) as db:
for id, text_from, proxy_from in db.execute(
f'select f.id, f.text, pf.text from splitted_from f left join proxy_from pf on pf.id = f.id where f.id in ({",".join([str(x) for x in ids])})'
for id, text_from, proxy_from, exclude in db.execute(
f'select f.id, f.text, pf.text, f.exclude from splitted_from f left join proxy_from pf on pf.id = f.id where f.id in ({",".join([str(x) for x in ids])})'
):
res.append((id, text_from, proxy_from))
res.append((id, text_from, proxy_from, exclude))
return res


def get_splitted_to_by_id(db_path, ids):
"""Get lines from splitted_to by ids"""
res = []
with sqlite3.connect(db_path) as db:
for id, text_to, proxy_to in db.execute(
f'select t.id, t.text, pt.text from splitted_to t left join proxy_to pt on pt.id = t.id where t.id in ({",".join([str(x) for x in ids])})'
for id, text_to, proxy_to, exclude in db.execute(
f'select t.id, t.text, pt.text, t.exclude from splitted_to t left join proxy_to pt on pt.id = t.id where t.id in ({",".join([str(x) for x in ids])})'
):
res.append((id, text_to, proxy_to))
res.append((id, text_to, proxy_to, exclude))
return res


def get_splitted_from(db_path):
"""Get lines from splitted_from"""
with sqlite3.connect(db_path) as db:
res = db.execute(
f'select f.text from splitted_from f order by f.id').fetchall()
return [x[0] for x in res]


def get_splitted_to(db_path):
"""Get lines from splitted_to"""
with sqlite3.connect(db_path) as db:
res = db.execute(
f'select t.text from splitted_to t order by t.id').fetchall()
return [x[0] for x in res]


def switch_excluded_splitted_to(db_path, id):
"""Mark splitted_to line as unused"""
with sqlite3.connect(db_path) as db:
exclude = db.execute("select exclude from splitted_to where id=:id", {
"id": id}).fetchone()
if exclude:
db.execute('update splitted_to set exclude=:exclude where id=:id', {
"exclude": (exclude[0] + 1) % 2, "id": id})
return


def switch_excluded_splitted_from(db_path, id):
"""Mark splitted_from line as unused"""
with sqlite3.connect(db_path) as db:
exclude = db.execute("select exclude from splitted_from where id=:id", {
"id": id}).fetchone()
if exclude:
db.execute('update splitted_from set exclude=:exclude where id=:id', {
"exclude": (exclude[0] + 1) % 2, "id": id})
return


def get_texts_length(db_path):
"""Get splitted lines count"""
res = []
Expand All @@ -394,9 +432,9 @@ def init_user_db(username):
logging.info(f"creating user db: {db_path}")
with sqlite3.connect(db_path) as db:
db.execute(
'create table documents(id integer primary key, guid varchar, lang varchar, name varchar)')
'create table documents(id integer primary key, guid text, lang text, name text)')
db.execute(
'create table alignments(id integer primary key, guid varchar, guid_from varchar, guid_to varchar, name varchar, state integer, curr_batches integer, total_batches integer, deleted integer default 0 NOT NULL)')
'create table alignments(id integer primary key, guid text, guid_from text, guid_to text, lang_from text, lang_to text, name text, state integer, curr_batches integer, total_batches integer, deleted integer default 0 NOT NULL)')


def alignment_exists(username, guid_from, guid_to):
Expand All @@ -420,21 +458,21 @@ def alignment_guid_exists(username, guid):
def register_alignment(username, lang_from, lang_to, guid, guid_from, guid_to, name, total_batches):
"""Register new alignment in user.db and main.db"""
main_db_path = os.path.join(con.UPLOAD_FOLDER, con.MAIN_DB_NAME)
db_path = os.path.join(con.UPLOAD_FOLDER, username, con.USER_DB_NAME)
user_db_path = os.path.join(con.UPLOAD_FOLDER, username, con.USER_DB_NAME)
if not alignment_exists(username, guid_from, guid_to):
with sqlite3.connect(main_db_path) as main_db:
main_db.execute('insert into global_alignments(guid, username, lang_from, lang_to, name, state, insert_ts, deleted) values (:guid, :username, :lang_from, :lang_to, :name, 2, :insert_ts, 0) ', {
"guid": guid, "username": username, "lang_from": lang_from, "lang_to": lang_to, "name": name, "insert_ts": datetime.datetime.utcnow().strftime('%Y-%m-%d_%H:%M:%S')})
with sqlite3.connect(db_path) as db:
db.execute('insert into alignments(guid, guid_from, guid_to, name, state, curr_batches, total_batches) values (:guid, :guid_from, :guid_to, :name, 2, 0, :total_batches) ', {
"guid": guid, "guid_from": guid_from, "guid_to": guid_to, "name": name, "total_batches": total_batches})
with sqlite3.connect(user_db_path) as db:
db.execute('insert into alignments(guid, guid_from, guid_to, lang_from, lang_to, name, state, curr_batches, total_batches) values (:guid, :guid_from, :guid_to, :lang_from, :lang_to, :name, 2, 0, :total_batches) ', {
"guid": guid, "guid_from": guid_from, "guid_to": guid_to, "lang_from": lang_from, "lang_to": lang_to, "name": name, "total_batches": total_batches})
return


def get_alignment_id(username, guid_from, guid_to):
"""Return alignment id"""
db_path = os.path.join(con.UPLOAD_FOLDER, username, con.USER_DB_NAME)
with sqlite3.connect(db_path) as db:
user_db_path = os.path.join(con.UPLOAD_FOLDER, username, con.USER_DB_NAME)
with sqlite3.connect(user_db_path) as db:
res = db.execute("select guid from alignments where guid_from=:guid_from and guid_to=:guid_to", {
"guid_from": guid_from, "guid_to": guid_to}).fetchone()
return res[0] if res else None
Expand Down Expand Up @@ -571,6 +609,24 @@ def get_fileinfo(username, guid):
return filename[0] if filename else None


def get_alignment_fileinfo_from(username, guid):
"""Get file (from) info by id from alignments table"""
db_path = os.path.join(con.UPLOAD_FOLDER, username, con.USER_DB_NAME)
with sqlite3.connect(db_path) as db:
res = db.execute("select guid_from, lang_from from alignments where guid_from=:guid", {
"guid": guid}).fetchone()
return ([res[0], res[1]]) if res else None


def get_alignment_fileinfo_to(username, guid):
"""Get file (to) info by id from alignments table"""
db_path = os.path.join(con.UPLOAD_FOLDER, username, con.USER_DB_NAME)
with sqlite3.connect(db_path) as db:
res = db.execute("select guid_to, lang_to from alignments where guid_to=:guid", {
"guid": guid}).fetchone()
return ([res[0], res[1]]) if res else None


def get_alignment_info(username, guid):
"""Get alignment info by id"""
db_path = os.path.join(con.UPLOAD_FOLDER, username, con.USER_DB_NAME)
Expand All @@ -584,13 +640,10 @@ def get_alignments_list(username, lang_from, lang_to):
with sqlite3.connect(db_path) as db:
res = db.execute("""select
a.guid, a.name, a.guid_from, a.guid_to, a.state, a.curr_batches, a.total_batches
from
alignments a
join documents d_from on d_from.guid=a.guid_from
join documents d_to on d_to.guid=a.guid_to
from alignments a
where
d_from.lang=:lang_from and d_to.lang=:lang_to
and deleted <> 1""", {
a.lang_from=:lang_from and a.lang_to=:lang_to
and a.deleted <> 1""", {
"lang_from": lang_from, "lang_to": lang_to}).fetchall()
return res

Expand Down
73 changes: 38 additions & 35 deletions be/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,29 +218,22 @@ def start_alignment(username):

name, guid_from, guid_to, state, curr_batches, total_batches = helper.get_alignment_info(
username, align_guid)
file_from, lang_from = helper.get_fileinfo(username, guid_from)
file_to, lang_to = helper.get_fileinfo(username, guid_to)
_, lang_from = helper.get_alignment_fileinfo_from(username, guid_from)
_, lang_to = helper.get_alignment_fileinfo_to(username, guid_to)

db_folder = os.path.join(con.UPLOAD_FOLDER, username,
con.DB_FOLDER, lang_from, lang_to)
db_path = os.path.join(db_folder, f"{align_guid}.db")
user_db_path = os.path.join(con.UPLOAD_FOLDER, username, con.USER_DB_NAME)

logging.info(
f"align parameters align_guid {align_guid} align_all {align_all} batch_ids {batch_ids} name {name} guid_from {guid_from} guid_to {guid_to} total_batches {total_batches}")
f"align parameters START align_guid {align_guid} align_all {align_all} batch_ids {batch_ids} name {name} guid_from {guid_from} guid_to {guid_to} total_batches {total_batches}")

splitted_from = os.path.join(
con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER, lang_from, file_from)
splitted_to = os.path.join(
con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER, lang_to, file_to)

with open(splitted_from, mode="r", encoding="utf-8") as input_from, \
open(splitted_to, mode="r", encoding="utf-8") as input_to:
lines_from = input_from.readlines()
lines_to = input_to.readlines()
lines_from = helper.get_splitted_from(db_path)
lines_to = helper.get_splitted_to(db_path)

logging.info(f"[{username}]. Cleaning images.")
helper.clean_img_user_foler(username, file_from)
# helper.clean_img_user_foler(username, align_guid)

if align_all:
batch_ids = list(range(total_batches))
Expand Down Expand Up @@ -281,13 +274,11 @@ def start_alignment(username):
@app.route("/items/<username>/alignment/align/next", methods=["POST"])
def align_next_batch(username):
"""Align next batch of two splitted documents"""

align_guid = request.form.get("id", '')

name, guid_from, guid_to, state, curr_batches, total_batches = helper.get_alignment_info(
username, align_guid)
file_from, lang_from = helper.get_fileinfo(username, guid_from)
file_to, lang_to = helper.get_fileinfo(username, guid_to)
_, lang_from = helper.get_alignment_fileinfo_from(username, guid_from)
_, lang_to = helper.get_alignment_fileinfo_to(username, guid_to)

db_folder = os.path.join(con.UPLOAD_FOLDER, username,
con.DB_FOLDER, lang_from, lang_to)
Expand All @@ -298,20 +289,13 @@ def align_next_batch(username):
batch_ids = [batches_count]

logging.info(
f"align parameters align_guid {align_guid} batch_ids {batch_ids} name {name} guid_from {guid_from} guid_to {guid_to} total_batches {total_batches}")
f"align parameters NEXT align_guid {align_guid} batch_ids {batch_ids} name {name} guid_from {guid_from} guid_to {guid_to} total_batches {total_batches}")

splitted_from = os.path.join(
con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER, lang_from, file_from)
splitted_to = os.path.join(
con.UPLOAD_FOLDER, username, con.SPLITTED_FOLDER, lang_to, file_to)

with open(splitted_from, mode="r", encoding="utf-8") as input_from, \
open(splitted_to, mode="r", encoding="utf-8") as input_to:
lines_from = input_from.readlines()
lines_to = input_to.readlines()
lines_from = helper.get_splitted_from(db_path)
lines_to = helper.get_splitted_to(db_path)

logging.info(f"[{username}]. Cleaning images.")
helper.clean_img_user_foler(username, file_from)
# helper.clean_img_user_foler(username, align_guid)

# exit if batch ids is empty
batch_ids = [x for x in batch_ids if x < total_batches][:total_batches]
Expand Down Expand Up @@ -422,10 +406,11 @@ def get_splitted_from_by_ids(username, lang_from, lang_to, align_guid):

res = {}
if text_ids:
for id, text, proxy in helper.get_splitted_from_by_id(db_path, text_ids):
for id, text, proxy, exclude in helper.get_splitted_from_by_id(db_path, text_ids):
res[id] = {
"t": text,
"p": proxy if proxy else ''
"p": proxy if proxy else '',
"e": exclude == 1
}
return {"items": res}

Expand All @@ -442,10 +427,11 @@ def get_splitted_to_by_ids(username, lang_from, lang_to, align_guid):

res = {}
if text_ids:
for id, text, proxy in helper.get_splitted_to_by_id(db_path, text_ids):
for id, text, proxy, exclude in helper.get_splitted_to_by_id(db_path, text_ids):
res[id] = {
"t": text,
"p": proxy if proxy else ''
"p": proxy if proxy else '',
"e": exclude == 1
}

return {"items": res}
Expand Down Expand Up @@ -590,10 +576,28 @@ def stop_alignment(username, lang_from, lang_to, aling_id):
return ('', 200)


@app.route("/items/<username>/edit/exclude/<lang_from>/<lang_to>/<aling_id>", methods=["POST"])
def switch_excluded(username, lang_from, lang_to, aling_id):
"""Switch excluded flag for unused string"""
db_folder = os.path.join(con.UPLOAD_FOLDER, username,
con.DB_FOLDER, lang_from, lang_to)
db_path = os.path.join(db_folder, f'{aling_id}.db')

line_id = request.form.get("line_id", -1)
text_type = request.form.get("text_type", con.TYPE_FROM)

print("EXCLUDING", text_type, aling_id, lang_from, lang_to, line_id)

if text_type == "from":
helper.switch_excluded_splitted_from(db_path, line_id)
else:
helper.switch_excluded_splitted_to(db_path, line_id)
return ('', 200)


@app.route("/debug/items", methods=["GET"])
def show_items_tree():
"""Show all files in data folder"""

tree_path = os.path.join(tempfile.gettempdir(), "items_tree.txt")
logging.debug(f"Temp file for tree structure: {tree_path}.")
with open(tree_path, mode="w", encoding="utf-8") as tree_out:
Expand All @@ -606,9 +610,8 @@ def show_items_tree():
tree_out.write(f"{subindent}{file}" + "\n")
return send_file(tree_path)

# Not API calls treated like static queries


# Not API calls treated like static queries
@app.route("/<path:path>")
def route_frontend(path):
"""Route static requests"""
Expand Down
10 changes: 10 additions & 0 deletions fe/src/common/api.service.js
Original file line number Diff line number Diff line change
Expand Up @@ -231,4 +231,14 @@ export const ItemsService = {
form
);
},
editProcessingMarkUnused(params) {
let form = new FormData();
form.append("text_type", params.textType);
form.append("line_id", params.lineId);
return ApiService.post(
"items",
`${params.username}/edit/exclude/${params.langCodeFrom}/${params.langCodeTo}/${params.guid}`,
form
);
},
};
12 changes: 10 additions & 2 deletions fe/src/common/settings.helper.js
Original file line number Diff line number Diff line change
@@ -1,13 +1,21 @@
export const SettingsHelper = {
getShowProxyTo() {
return localStorage.showProxyTo ? localStorage.showProxyTo : defaultClientSettings.showProxyTo;
}
},
getHideMarkedTo() {
return localStorage.hideMarkedTo ? localStorage.hideMarkedTo : defaultClientSettings.hideMarkedTo;
},
getHideMarkedFrom() {
return localStorage.hideMarkedFrom ? localStorage.hideMarkedFrom : defaultClientSettings.hideMarkedFrom;
},
}

const defaultClientSettings = {
showProxyTo: true,
hideMarkedTo: false,
hideMarkedFrom: false,
candidatesSorting: CANDIDATES_SORTING_NEAREST
}

export const CANDIDATES_SORTING_NEAREST = 'nearest'
export const CANDIDATES_SORTING_SIMILAR = 'similar'
export const CANDIDATES_SORTING_SIMILAR = 'similar'
1 change: 1 addition & 0 deletions fe/src/store/actions.type.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ export const DELETE_ALIGNMENT = "delete_alignment"
export const STOP_ALIGNMENT = "stop_alignment";

export const EDIT_PROCESSING = "edit_processing";
export const EDIT_PROCESSING_MARK_UNUSED = "edit_processing_mark_unused";

export const DOWNLOAD_PROCESSING = "download_processing";
export const DOWNLOAD_SPLITTED = "download_splitted";
Expand Down

0 comments on commit 6543e74

Please sign in to comment.