From c3a0cd05471de744972cdf17945392d0333b10c5 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 16 Jul 2019 11:23:12 +0200 Subject: [PATCH 01/74] Exclude shell scripts from CRLF line ending rule --- .editorconfig | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/.editorconfig b/.editorconfig index 0d0c734..b070773 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,9 +1,12 @@ -root = true - -[*] -indent_style = space -indent_size = 4 -charset = utf-8 -end_of_line = crlf -trim_trailing_whitespace = true -insert_final_newline = true +root = true + +[*] +indent_style = space +indent_size = 4 +charset = utf-8 +end_of_line = crlf +trim_trailing_whitespace = true +insert_final_newline = true + +[*.sh] +end_of_line = lf From 6e9bc18f46ed08e7fac4f0962899b5160e0dee3a Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 16 Jul 2019 11:42:45 +0200 Subject: [PATCH 02/74] Begin implementing web UI server --- web/__init__.py | 0 web/app.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 web/__init__.py create mode 100644 web/app.py diff --git a/web/__init__.py b/web/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/web/app.py b/web/app.py new file mode 100644 index 0000000..c61e3ec --- /dev/null +++ b/web/app.py @@ -0,0 +1,30 @@ +from flask import Flask +from psycopg2 import connect, Error as PG_Error +from sys import stderr +from .credentials import conn_str +# from code_duplication import ??? + +app = Flask(__name__) + + +@app.route("/") +def hello(): + try: + conn = connect(conn_str) + + cur = conn.cursor() + + cur.execute("SELECT col FROM test;") + return str(cur.fetchall()) + + except PG_Error as ex: + print("PostgreSQL Error:", ex, file=stderr) + + finally: + if conn: + cur.close() + conn.close() + + +if __name__ == "__main__": + app.run() From 67ead15d971ba684f2e8f34a894deb2953bbfc76 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 16 Jul 2019 11:43:03 +0200 Subject: [PATCH 03/74] Add web server credentials file to .gitignore --- .gitignore | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index fb6fefb..a5b6e02 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,10 @@ -.idea/ -.vscode/ -*.pyc -code_duplication/build/ -code_duplication/code_duplication_scanner.egg-info/ -code_duplication/dist/ -code_duplication/env/ -code_duplication/repos/ -venv/ +.idea/ +.vscode/ +*.pyc +code_duplication/build/ +code_duplication/code_duplication_scanner.egg-info/ +code_duplication/dist/ +code_duplication/env/ +code_duplication/repos/ +venv/ +web/credentials.py From 4f73e0c7bc60ef78a013f09dcaf1e8cb82557770 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 16 Jul 2019 11:43:30 +0200 Subject: [PATCH 04/74] Add script for running web server in debug mode --- run_web.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 run_web.sh diff --git a/run_web.sh b/run_web.sh new file mode 100755 index 0000000..03ccbe6 --- /dev/null +++ b/run_web.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +export FLASK_ENV="development" +export FLASK_APP="web/app.py" + +flask run From 458577fb6ebd6ac5272edfa20bc0fbf38773137c Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 10:01:10 +0200 Subject: [PATCH 05/74] Slightly improve web UI and its server --- web/app.py | 40 ++++++++++++++++++++++------------- web/index.html | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 15 deletions(-) create mode 100644 web/index.html diff --git a/web/app.py b/web/app.py index c61e3ec..c3cdae5 100644 --- a/web/app.py +++ b/web/app.py @@ -1,29 +1,39 @@ -from flask import Flask -from psycopg2 import connect, Error as PG_Error +from flask import Flask, request +# from psycopg2 import connect, Error as PG_Error from sys import stderr -from .credentials import conn_str -# from code_duplication import ??? +# from .credentials import conn_str +from code_duplication.src.secondary_algorithm.fast_check import type1_check_repo +import os.path + +_INDEX_HTML = os.path.join(os.path.dirname(__file__), "index.html") app = Flask(__name__) @app.route("/") def hello(): - try: - conn = connect(conn_str) + with open(_INDEX_HTML, "r", encoding="utf-8") as f: + webpage = f.read() + + first_repo = request.args.get("first") + + return webpage.replace("#LOG#", "\n\n".join([f"{k[:20]} -- {v}" for k, v in type1_check_repo(first_repo, 30).items()]) if first_repo else "") + + # try: + # conn = connect(conn_str) - cur = conn.cursor() + # cur = conn.cursor() - cur.execute("SELECT col FROM test;") - return str(cur.fetchall()) + # cur.execute("SELECT col FROM test;") + # return str(cur.fetchall()) - except PG_Error as ex: - print("PostgreSQL Error:", ex, file=stderr) + # except PG_Error as ex: + # print("PostgreSQL Error:", ex, file=stderr) - finally: - if conn: - cur.close() - conn.close() + # finally: + # if conn: + # cur.close() + # conn.close() if __name__ == "__main__": diff --git a/web/index.html b/web/index.html new file mode 100644 index 0000000..8d4ec41 --- /dev/null +++ b/web/index.html @@ -0,0 +1,57 @@ + + + + + + + Code Clone Duplication Parser + + + + + + + + + + + + + +
+
+

Code Clone Detection Parser

+
+ +
+
+
+
+ + +
+ + +
+
+
+ +
+
+
+
+ + +
+
+
+
+
+ + + + + + \ No newline at end of file From ad97a17778d245d56bc0a2ab320ad1dda89aa3ae Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 10:01:24 +0200 Subject: [PATCH 06/74] Add Windows batch script for running web server --- run_web.bat | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 run_web.bat diff --git a/run_web.bat b/run_web.bat new file mode 100644 index 0000000..d18ba6f --- /dev/null +++ b/run_web.bat @@ -0,0 +1,3 @@ +set FLASK_ENV=development +set FLASK_APP=web/app.py +flask run From 0e9b29484794a6290eafd49418e96faa74bb4913 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 10:02:17 +0200 Subject: [PATCH 07/74] Make necessary changes to type 1 check algorithm --- .../src/secondary_algorithm/fast_check.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/code_duplication/src/secondary_algorithm/fast_check.py b/code_duplication/src/secondary_algorithm/fast_check.py index 3015d71..827f570 100644 --- a/code_duplication/src/secondary_algorithm/fast_check.py +++ b/code_duplication/src/secondary_algorithm/fast_check.py @@ -1,7 +1,9 @@ from fastlog import log +from ..preprocessing.repo_cloner import get_repo_dir +from ..preprocessing.module_parser import get_modules_from_dir -def type1_check(modules): +def type1_check(modules, weight_limit=25): """ Very simple type 1 code duplication check based on AST.dump() function. @@ -9,7 +11,6 @@ def type1_check(modules): modules (list[list[TreeNode]): Python ASTs from a repository """ - WEIGHT_LIMIT = 25 # PRIORITY_CLASSES = [ast.Module, ast.ClassDef, # ast.FunctionDef, ast.AsyncFunctionDef] @@ -19,7 +20,7 @@ def type1_check(modules): visited = set() for n in m: - if n.parent_index in visited or n.weight < WEIGHT_LIMIT: + if n.parent_index in visited or n.weight < weight_limit: visited.add(n.index) continue @@ -31,6 +32,11 @@ def type1_check(modules): else: node_dict[node_dump] = [n] - for v in node_dict.values(): - if len(v) > 1: - log.success(v) + return {k: v for k, v in node_dict.items() if len(v) > 1} + + +def type1_check_repo(repo, weight): + repo_dir = get_repo_dir(repo) + repo_modules = get_modules_from_dir(repo_dir) + + return type1_check(repo_modules, weight) From 486760e99722eac1f9c9900b1e77b048bb0d724a Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 11:30:38 +0200 Subject: [PATCH 08/74] Fix element name --- web/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/app.py b/web/app.py index c3cdae5..8c8f1e9 100644 --- a/web/app.py +++ b/web/app.py @@ -15,9 +15,9 @@ def hello(): with open(_INDEX_HTML, "r", encoding="utf-8") as f: webpage = f.read() - first_repo = request.args.get("first") + first_repo = request.args.get("first_repo") - return webpage.replace("#LOG#", "\n\n".join([f"{k[:20]} -- {v}" for k, v in type1_check_repo(first_repo, 30).items()]) if first_repo else "") + return webpage.replace("#LOG#", "\n\n".join([f"{k[:20]} -- {v}" for k, v in type1_check_repo(first_repo, 15).items()]) if first_repo else "") # try: # conn = connect(conn_str) From 417ce9239fac32afda2d2461adf4e997183c7271 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 14:52:17 +0200 Subject: [PATCH 09/74] Redesign detected clone list in HTML --- web/index.html | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/web/index.html b/web/index.html index 8d4ec41..7129fab 100644 --- a/web/index.html +++ b/web/index.html @@ -39,14 +39,12 @@

Code Clone Detection Parser

-
-
-
- - -
+
+
Detected clones
+
+ #LOG#
- +
From 853ff0fc08f799416cbda1ad6140f55e82a77d98 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 14:52:39 +0200 Subject: [PATCH 10/74] Generate detected clone lists using HTML lists --- web/app.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/web/app.py b/web/app.py index 8c8f1e9..a99c3d2 100644 --- a/web/app.py +++ b/web/app.py @@ -15,9 +15,15 @@ def hello(): with open(_INDEX_HTML, "r", encoding="utf-8") as f: webpage = f.read() + output = "" + first_repo = request.args.get("first_repo") + if first_repo: + output = "
    " + "".join([("
  1. " + k[:40] + "...
      " + + "".join(["
    • " + n.origin + "
    • " for n in v]) + + "
  2. ") for k, v in type1_check_repo(first_repo, 15).items()]) + "
" - return webpage.replace("#LOG#", "\n\n".join([f"{k[:20]} -- {v}" for k, v in type1_check_repo(first_repo, 15).items()]) if first_repo else "") + return webpage.replace("#LOG#", output) # try: # conn = connect(conn_str) From de4714eb8575b7c3217d504a1b7744be0ccc52ea Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 16:47:01 +0200 Subject: [PATCH 11/74] Write detection results to JSON file --- web/app.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/web/app.py b/web/app.py index a99c3d2..f151708 100644 --- a/web/app.py +++ b/web/app.py @@ -19,9 +19,14 @@ def hello(): first_repo = request.args.get("first_repo") if first_repo: - output = "
    " + "".join([("
  1. " + k[:40] + "...
      " + - "".join(["
    • " + n.origin + "
    • " for n in v]) + - "
  2. ") for k, v in type1_check_repo(first_repo, 15).items()]) + "
" + result = type1_check_repo(first_repo, 15) + + with open("result.json", "w", encoding="utf-8") as f: + f.write(result.json()) + + output = "
    " + "".join([("
  1. " + f"Value: {c.value}; Weight: {c.weight}; Similarity: {c.similarity * 100:g} %" + "
      " + + "".join(["
    • " + o + "
    • " for o in c.origins]) + + "
  2. ") for c in result.clones]) + "
" return webpage.replace("#LOG#", output) From 6f7489fb38e0e6a80b11a7ca6fbfe3ad24d468db Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 16:47:39 +0200 Subject: [PATCH 12/74] Return results using new classes made for results --- code_duplication/src/secondary_algorithm/fast_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/code_duplication/src/secondary_algorithm/fast_check.py b/code_duplication/src/secondary_algorithm/fast_check.py index 827f570..3b71ccc 100644 --- a/code_duplication/src/secondary_algorithm/fast_check.py +++ b/code_duplication/src/secondary_algorithm/fast_check.py @@ -1,6 +1,8 @@ from fastlog import log from ..preprocessing.repo_cloner import get_repo_dir from ..preprocessing.module_parser import get_modules_from_dir +from ..output.DetectedClone import DetectedClone +from ..output.DetectionResult import DetectionResult def type1_check(modules, weight_limit=25): @@ -39,4 +41,4 @@ def type1_check_repo(repo, weight): repo_dir = get_repo_dir(repo) repo_modules = get_modules_from_dir(repo_dir) - return type1_check(repo_modules, weight) + return DetectionResult([DetectedClone(k[:20] + "...", 1, v) for k, v in type1_check(repo_modules, weight).items()]) From e44a29bab1effb96a90a3485a4357e8a57601155 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 17:10:44 +0200 Subject: [PATCH 13/74] Change .output to .results in fast_check --- code_duplication/src/secondary_algorithm/fast_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code_duplication/src/secondary_algorithm/fast_check.py b/code_duplication/src/secondary_algorithm/fast_check.py index 3b71ccc..66f97b4 100644 --- a/code_duplication/src/secondary_algorithm/fast_check.py +++ b/code_duplication/src/secondary_algorithm/fast_check.py @@ -1,8 +1,8 @@ from fastlog import log from ..preprocessing.repo_cloner import get_repo_dir from ..preprocessing.module_parser import get_modules_from_dir -from ..output.DetectedClone import DetectedClone -from ..output.DetectionResult import DetectionResult +from ..results.DetectedClone import DetectedClone +from ..results.DetectionResult import DetectionResult def type1_check(modules, weight_limit=25): From c24b586aa7a10cfda10f1bad6c591975d3c09aa4 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 16 Jul 2019 11:23:12 +0200 Subject: [PATCH 14/74] Exclude shell scripts from CRLF line ending rule --- .editorconfig | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/.editorconfig b/.editorconfig index 0d0c734..b070773 100644 --- a/.editorconfig +++ b/.editorconfig @@ -1,9 +1,12 @@ -root = true - -[*] -indent_style = space -indent_size = 4 -charset = utf-8 -end_of_line = crlf -trim_trailing_whitespace = true -insert_final_newline = true +root = true + +[*] +indent_style = space +indent_size = 4 +charset = utf-8 +end_of_line = crlf +trim_trailing_whitespace = true +insert_final_newline = true + +[*.sh] +end_of_line = lf From 0bf679079feea7b6d2a8fb36b62be6f93c657f0d Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 16 Jul 2019 11:42:45 +0200 Subject: [PATCH 15/74] Begin implementing web UI server --- web/__init__.py | 0 web/app.py | 30 ++++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) create mode 100644 web/__init__.py create mode 100644 web/app.py diff --git a/web/__init__.py b/web/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/web/app.py b/web/app.py new file mode 100644 index 0000000..c61e3ec --- /dev/null +++ b/web/app.py @@ -0,0 +1,30 @@ +from flask import Flask +from psycopg2 import connect, Error as PG_Error +from sys import stderr +from .credentials import conn_str +# from code_duplication import ??? + +app = Flask(__name__) + + +@app.route("/") +def hello(): + try: + conn = connect(conn_str) + + cur = conn.cursor() + + cur.execute("SELECT col FROM test;") + return str(cur.fetchall()) + + except PG_Error as ex: + print("PostgreSQL Error:", ex, file=stderr) + + finally: + if conn: + cur.close() + conn.close() + + +if __name__ == "__main__": + app.run() From e67b5e830f82a4fa900e9ac1379308bab715216a Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 16 Jul 2019 11:43:03 +0200 Subject: [PATCH 16/74] Add web server credentials file to .gitignore --- .gitignore | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/.gitignore b/.gitignore index fb6fefb..a5b6e02 100644 --- a/.gitignore +++ b/.gitignore @@ -1,9 +1,10 @@ -.idea/ -.vscode/ -*.pyc -code_duplication/build/ -code_duplication/code_duplication_scanner.egg-info/ -code_duplication/dist/ -code_duplication/env/ -code_duplication/repos/ -venv/ +.idea/ +.vscode/ +*.pyc +code_duplication/build/ +code_duplication/code_duplication_scanner.egg-info/ +code_duplication/dist/ +code_duplication/env/ +code_duplication/repos/ +venv/ +web/credentials.py From 289ca9ff85c7cf70e5e63893a71c2c51155b8fb8 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 16 Jul 2019 11:43:30 +0200 Subject: [PATCH 17/74] Add script for running web server in debug mode --- run_web.sh | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100755 run_web.sh diff --git a/run_web.sh b/run_web.sh new file mode 100755 index 0000000..03ccbe6 --- /dev/null +++ b/run_web.sh @@ -0,0 +1,8 @@ +#!/bin/bash + +set -e + +export FLASK_ENV="development" +export FLASK_APP="web/app.py" + +flask run From c654aad7a827526b64ff8a25e06357943a72b49f Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 10:01:10 +0200 Subject: [PATCH 18/74] Slightly improve web UI and its server --- web/app.py | 40 ++++++++++++++++++++++------------- web/index.html | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+), 15 deletions(-) create mode 100644 web/index.html diff --git a/web/app.py b/web/app.py index c61e3ec..c3cdae5 100644 --- a/web/app.py +++ b/web/app.py @@ -1,29 +1,39 @@ -from flask import Flask -from psycopg2 import connect, Error as PG_Error +from flask import Flask, request +# from psycopg2 import connect, Error as PG_Error from sys import stderr -from .credentials import conn_str -# from code_duplication import ??? +# from .credentials import conn_str +from code_duplication.src.secondary_algorithm.fast_check import type1_check_repo +import os.path + +_INDEX_HTML = os.path.join(os.path.dirname(__file__), "index.html") app = Flask(__name__) @app.route("/") def hello(): - try: - conn = connect(conn_str) + with open(_INDEX_HTML, "r", encoding="utf-8") as f: + webpage = f.read() + + first_repo = request.args.get("first") + + return webpage.replace("#LOG#", "\n\n".join([f"{k[:20]} -- {v}" for k, v in type1_check_repo(first_repo, 30).items()]) if first_repo else "") + + # try: + # conn = connect(conn_str) - cur = conn.cursor() + # cur = conn.cursor() - cur.execute("SELECT col FROM test;") - return str(cur.fetchall()) + # cur.execute("SELECT col FROM test;") + # return str(cur.fetchall()) - except PG_Error as ex: - print("PostgreSQL Error:", ex, file=stderr) + # except PG_Error as ex: + # print("PostgreSQL Error:", ex, file=stderr) - finally: - if conn: - cur.close() - conn.close() + # finally: + # if conn: + # cur.close() + # conn.close() if __name__ == "__main__": diff --git a/web/index.html b/web/index.html new file mode 100644 index 0000000..8d4ec41 --- /dev/null +++ b/web/index.html @@ -0,0 +1,57 @@ + + + + + + + Code Clone Duplication Parser + + + + + + + + + + + + + +
+
+

Code Clone Detection Parser

+
+ +
+
+
+
+ + +
+ + +
+
+
+ +
+
+
+
+ + +
+
+
+
+
+ + + + + + \ No newline at end of file From 227164dd9f62766ca960adcc902862a36c6ce026 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 10:01:24 +0200 Subject: [PATCH 19/74] Add Windows batch script for running web server --- run_web.bat | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 run_web.bat diff --git a/run_web.bat b/run_web.bat new file mode 100644 index 0000000..d18ba6f --- /dev/null +++ b/run_web.bat @@ -0,0 +1,3 @@ +set FLASK_ENV=development +set FLASK_APP=web/app.py +flask run From ac484c14f133ee7d8b502d3630b8b84409d3dfa2 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 10:02:17 +0200 Subject: [PATCH 20/74] Make necessary changes to type 1 check algorithm --- .../src/secondary_algorithm/fast_check.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/code_duplication/src/secondary_algorithm/fast_check.py b/code_duplication/src/secondary_algorithm/fast_check.py index 3015d71..827f570 100644 --- a/code_duplication/src/secondary_algorithm/fast_check.py +++ b/code_duplication/src/secondary_algorithm/fast_check.py @@ -1,7 +1,9 @@ from fastlog import log +from ..preprocessing.repo_cloner import get_repo_dir +from ..preprocessing.module_parser import get_modules_from_dir -def type1_check(modules): +def type1_check(modules, weight_limit=25): """ Very simple type 1 code duplication check based on AST.dump() function. @@ -9,7 +11,6 @@ def type1_check(modules): modules (list[list[TreeNode]): Python ASTs from a repository """ - WEIGHT_LIMIT = 25 # PRIORITY_CLASSES = [ast.Module, ast.ClassDef, # ast.FunctionDef, ast.AsyncFunctionDef] @@ -19,7 +20,7 @@ def type1_check(modules): visited = set() for n in m: - if n.parent_index in visited or n.weight < WEIGHT_LIMIT: + if n.parent_index in visited or n.weight < weight_limit: visited.add(n.index) continue @@ -31,6 +32,11 @@ def type1_check(modules): else: node_dict[node_dump] = [n] - for v in node_dict.values(): - if len(v) > 1: - log.success(v) + return {k: v for k, v in node_dict.items() if len(v) > 1} + + +def type1_check_repo(repo, weight): + repo_dir = get_repo_dir(repo) + repo_modules = get_modules_from_dir(repo_dir) + + return type1_check(repo_modules, weight) From cb5a7fa58720caec8975ded12ab460fce2324bb5 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 11:30:38 +0200 Subject: [PATCH 21/74] Fix element name --- web/app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/app.py b/web/app.py index c3cdae5..8c8f1e9 100644 --- a/web/app.py +++ b/web/app.py @@ -15,9 +15,9 @@ def hello(): with open(_INDEX_HTML, "r", encoding="utf-8") as f: webpage = f.read() - first_repo = request.args.get("first") + first_repo = request.args.get("first_repo") - return webpage.replace("#LOG#", "\n\n".join([f"{k[:20]} -- {v}" for k, v in type1_check_repo(first_repo, 30).items()]) if first_repo else "") + return webpage.replace("#LOG#", "\n\n".join([f"{k[:20]} -- {v}" for k, v in type1_check_repo(first_repo, 15).items()]) if first_repo else "") # try: # conn = connect(conn_str) From fac6aca45a61773500d74eba2ca26c6e63d1262f Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 14:52:17 +0200 Subject: [PATCH 22/74] Redesign detected clone list in HTML --- web/index.html | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/web/index.html b/web/index.html index 8d4ec41..7129fab 100644 --- a/web/index.html +++ b/web/index.html @@ -39,14 +39,12 @@

Code Clone Detection Parser

-
-
-
- - -
+
+
Detected clones
+
+ #LOG#
- +
From 18a4eadc4e204aec5bf5bd7d052742922c728c6d Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 14:52:39 +0200 Subject: [PATCH 23/74] Generate detected clone lists using HTML lists --- web/app.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/web/app.py b/web/app.py index 8c8f1e9..a99c3d2 100644 --- a/web/app.py +++ b/web/app.py @@ -15,9 +15,15 @@ def hello(): with open(_INDEX_HTML, "r", encoding="utf-8") as f: webpage = f.read() + output = "" + first_repo = request.args.get("first_repo") + if first_repo: + output = "
    " + "".join([("
  1. " + k[:40] + "...
      " + + "".join(["
    • " + n.origin + "
    • " for n in v]) + + "
  2. ") for k, v in type1_check_repo(first_repo, 15).items()]) + "
" - return webpage.replace("#LOG#", "\n\n".join([f"{k[:20]} -- {v}" for k, v in type1_check_repo(first_repo, 15).items()]) if first_repo else "") + return webpage.replace("#LOG#", output) # try: # conn = connect(conn_str) From 05d9233c6ad6be24167576eda5144238fe355fc9 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 16:47:01 +0200 Subject: [PATCH 24/74] Write detection results to JSON file --- web/app.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/web/app.py b/web/app.py index a99c3d2..f151708 100644 --- a/web/app.py +++ b/web/app.py @@ -19,9 +19,14 @@ def hello(): first_repo = request.args.get("first_repo") if first_repo: - output = "
    " + "".join([("
  1. " + k[:40] + "...
      " + - "".join(["
    • " + n.origin + "
    • " for n in v]) + - "
  2. ") for k, v in type1_check_repo(first_repo, 15).items()]) + "
" + result = type1_check_repo(first_repo, 15) + + with open("result.json", "w", encoding="utf-8") as f: + f.write(result.json()) + + output = "
    " + "".join([("
  1. " + f"Value: {c.value}; Weight: {c.weight}; Similarity: {c.similarity * 100:g} %" + "
      " + + "".join(["
    • " + o + "
    • " for o in c.origins]) + + "
  2. ") for c in result.clones]) + "
" return webpage.replace("#LOG#", output) From aa9609a763436cdeabdd8f7c516b7b2d58b9d6d7 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 16:47:39 +0200 Subject: [PATCH 25/74] Return results using new classes made for results --- code_duplication/src/secondary_algorithm/fast_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/code_duplication/src/secondary_algorithm/fast_check.py b/code_duplication/src/secondary_algorithm/fast_check.py index 827f570..3b71ccc 100644 --- a/code_duplication/src/secondary_algorithm/fast_check.py +++ b/code_duplication/src/secondary_algorithm/fast_check.py @@ -1,6 +1,8 @@ from fastlog import log from ..preprocessing.repo_cloner import get_repo_dir from ..preprocessing.module_parser import get_modules_from_dir +from ..output.DetectedClone import DetectedClone +from ..output.DetectionResult import DetectionResult def type1_check(modules, weight_limit=25): @@ -39,4 +41,4 @@ def type1_check_repo(repo, weight): repo_dir = get_repo_dir(repo) repo_modules = get_modules_from_dir(repo_dir) - return type1_check(repo_modules, weight) + return DetectionResult([DetectedClone(k[:20] + "...", 1, v) for k, v in type1_check(repo_modules, weight).items()]) From 73980d3a278c05e64fc497547b188c99f402e5a3 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 17:10:44 +0200 Subject: [PATCH 26/74] Change .output to .results in fast_check --- code_duplication/src/secondary_algorithm/fast_check.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/code_duplication/src/secondary_algorithm/fast_check.py b/code_duplication/src/secondary_algorithm/fast_check.py index 3b71ccc..66f97b4 100644 --- a/code_duplication/src/secondary_algorithm/fast_check.py +++ b/code_duplication/src/secondary_algorithm/fast_check.py @@ -1,8 +1,8 @@ from fastlog import log from ..preprocessing.repo_cloner import get_repo_dir from ..preprocessing.module_parser import get_modules_from_dir -from ..output.DetectedClone import DetectedClone -from ..output.DetectionResult import DetectionResult +from ..results.DetectedClone import DetectedClone +from ..results.DetectionResult import DetectionResult def type1_check(modules, weight_limit=25): From 5a7810a85787c8bc1f626546ea760ce90cc5d110 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 17:51:38 +0200 Subject: [PATCH 27/74] Disable centering on clone list heading --- web/index.html | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/web/index.html b/web/index.html index 7129fab..c5f87ff 100644 --- a/web/index.html +++ b/web/index.html @@ -39,11 +39,9 @@

Code Clone Detection Parser

-
+
Detected clones
-
- #LOG# -
+
#LOG#
@@ -52,4 +50,4 @@
Detected clones
- \ No newline at end of file + From 0fe97e1ba87aaef5632165ac0ffa8da24c4837fe Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 17:52:10 +0200 Subject: [PATCH 28/74] Improve clone header format in HTML output --- web/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/app.py b/web/app.py index f151708..5ba8509 100644 --- a/web/app.py +++ b/web/app.py @@ -24,7 +24,7 @@ def hello(): with open("result.json", "w", encoding="utf-8") as f: f.write(result.json()) - output = "
    " + "".join([("
  1. " + f"Value: {c.value}; Weight: {c.weight}; Similarity: {c.similarity * 100:g} %" + "
      " + + output = "
        " + "".join([("
      1. " + f"{c.value} - Weight: {c.weight} - Similarity: {c.similarity * 100:g} %" + "
          " + "".join(["
        • " + o + "
        • " for o in c.origins]) + "
      2. ") for c in result.clones]) + "
      " From 5398f982715cb9cf8a14910fec1c191b3c92e3b9 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 17:52:56 +0200 Subject: [PATCH 29/74] Use TreeNode.value instead of skeleton string --- code_duplication/src/secondary_algorithm/fast_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/code_duplication/src/secondary_algorithm/fast_check.py b/code_duplication/src/secondary_algorithm/fast_check.py index 66f97b4..ee4f766 100644 --- a/code_duplication/src/secondary_algorithm/fast_check.py +++ b/code_duplication/src/secondary_algorithm/fast_check.py @@ -41,4 +41,6 @@ def type1_check_repo(repo, weight): repo_dir = get_repo_dir(repo) repo_modules = get_modules_from_dir(repo_dir) - return DetectionResult([DetectedClone(k[:20] + "...", 1, v) for k, v in type1_check(repo_modules, weight).items()]) + return DetectionResult([ + DetectedClone(node_list[0].value, 1, node_list) for node_list in + type1_check(repo_modules, weight).values()]) From 76c9b02409d157e632dccc936af6e4ad77c395d2 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 17:53:50 +0200 Subject: [PATCH 30/74] Change repo submit button text for clarity --- web/index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/index.html b/web/index.html index c5f87ff..31b245e 100644 --- a/web/index.html +++ b/web/index.html @@ -31,7 +31,7 @@

      Code Clone Detection Parser

      - From 69617c942b729d37f233e6e5cae0a6c624ea5005 Mon Sep 17 00:00:00 2001 From: natiiix Date: Tue, 23 Jul 2019 18:15:25 +0200 Subject: [PATCH 31/74] Make repo URL input box more self-descriptive --- web/index.html | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/web/index.html b/web/index.html index 31b245e..75f8e24 100644 --- a/web/index.html +++ b/web/index.html @@ -27,8 +27,8 @@

      Code Clone Detection Parser

      - - + +
      -
      -
      Detected clones
      -
      #LOG#
      -
      + #CONTENT#
      @@ -50,4 +47,4 @@
      Detected clones
      - \ No newline at end of file + diff --git a/web/message.html b/web/message.html new file mode 100644 index 0000000..0b15105 --- /dev/null +++ b/web/message.html @@ -0,0 +1,3 @@ +
      + #MSG# +
      diff --git a/web/results.html b/web/results.html new file mode 100644 index 0000000..c7ebcd1 --- /dev/null +++ b/web/results.html @@ -0,0 +1,4 @@ +
      +
      Detected clones
      +
      #CLONES#
      +