In [1]:
from lang_processors.java_processor import JavaProcessor
processor = JavaProcessor(root_folder='lang_processors/tree-sitter')

In [2]:
with open('Test.java', 'r', encoding='utf-8') as f:
    codes = f.read()
codes

'package tool;\n\nimport java.io.BufferedWriter;\nimport java.io.FileWriter;\nimport java.io.IOException;\n\n/**\n * これはJavaのHello Worldプログラムです。\n * ドックストリングはプログラムの説明やドキュメントを提供します。\n * このプログラムでは、forループを使用して3回Hello Worldを出力します。\n */\npublic class Test {\n    public static void main(String[] args) {\n        // forループを使用して3回Hello Worldを出力\n        for (int i = 0; i < 3; i++) {\n            System.out.println("Hello World!");\n        }\n    }\n}\n'

In [3]:
from clean_comment import exclude_comment
out = exclude_comment(codes)
print(out)

package tool;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
public class Test {
    public static void main(String[] args) {
        for (int i = 0; i < 3; i++) {
            System.out.println("Hello World!");
        }
    }
}


In [7]:
from tree_sitter import Language, Parser

JAVA = Language('lang_processors/tree-sitter/java.so', 'java')
parser = Parser()
parser.set_language(JAVA)

In [15]:
ast_nodes_type_string = ["comment", "string_literal", "character_literal"]
def dfs(code, node, tokens, tokens_type):
    # print(node.type)
    if len(node.children) == 0 or node.type in ast_nodes_type_string:
        snippet = code[node.start_byte: node.end_byte]
        if isinstance(snippet, bytes):
            snippet = snippet.decode("utf8")
        if len(snippet) > 0:
            tokens.append(snippet)
            tokens_type.append(node.type)
        return
    for child in node.children:
        print(child)
        dfs(code, child, tokens, tokens_type)

In [18]:
out = out.replace("\r", "")
print(out)
tree = parser.parse(bytes(out,"utf8"))

package tool;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
public class Test {
    public static void main(String[] args) {
        for (int i = 0; i < 3; i++) {
            System.out.println("Hello World!");
        }
    }
}


In [17]:
tokens = []
tokens_types = []
print(dfs(out, tree.root_node, tokens, tokens_types))

<Node type=package_declaration, start_point=(0, 0), end_point=(0, 13)>
<Node type="package", start_point=(0, 0), end_point=(0, 7)>
<Node type=identifier, start_point=(0, 8), end_point=(0, 12)>
<Node type=";", start_point=(0, 12), end_point=(0, 13)>
<Node type=import_declaration, start_point=(1, 0), end_point=(1, 30)>
<Node type="import", start_point=(1, 0), end_point=(1, 6)>
<Node type=scoped_identifier, start_point=(1, 7), end_point=(1, 29)>
<Node type=scoped_identifier, start_point=(1, 7), end_point=(1, 14)>
<Node type=identifier, start_point=(1, 7), end_point=(1, 11)>
<Node type=".", start_point=(1, 11), end_point=(1, 12)>
<Node type=identifier, start_point=(1, 12), end_point=(1, 14)>
<Node type=".", start_point=(1, 14), end_point=(1, 15)>
<Node type=identifier, start_point=(1, 15), end_point=(1, 29)>
<Node type=";", start_point=(1, 29), end_point=(1, 30)>
<Node type=import_declaration, start_point=(2, 0), end_point=(2, 26)>
<Node type="import", start_point=(2, 0), end_point=(2, 6)>

In [11]:
for token, token_type in zip(tokens, tokens_types):
    print(token, token_type)
    

package package
tool identifier
; ;
import import
java identifier
. .
io identifier
. .
BufferedWriter identifier
; ;
import import
java identifier
. .
io identifier
. .
FileWriter identifier
; ;
import import
java identifier
. .
io identifier
. .
IOException identifier
; ;
public public
class class
Test identifier
{ {
public public
static static
void void_type
main identifier
( (
String type_identifier
[ [
] ]
args identifier
) )
{ {
for for
( (
int int
i identifier
= =
0 decimal_integer_literal
; ;
i identifier
< <
3 decimal_integer_literal
; ;
i identifier
++ ++
) )
{ {
System identifier
. .
out identifier
. .
println identifier
( (
"Hello World!" string_literal
) )
; ;
} }
} }
} }


In [4]:
result = processor.tokenize_code(out)
print(result)

['package', 'tool', ';', 'import', 'java', '.', 'io', '.', 'BufferedWriter', ';', 'import', 'java', '.', 'io', '.', 'FileWriter', ';', 'import', 'java', '.', 'io', '.', 'IOException', ';', 'public', 'class', 'Test', '{', 'public', 'static', 'void', 'main', '(', 'String', '[', ']', 'args', ')', '{', 'for', '(', 'int', 'i', '=', '0', ';', 'i', '<', '3', ';', 'i', '++', ')', '{', 'System', '.', 'out', '.', 'println', '(', '"Hello World!"', ')', ';', '}', '}', '}']
package package
tool identifier
; ;
import import
java identifier
. .
io identifier
. .
BufferedWriter identifier
; ;
import import
java identifier
. .
io identifier
. .
FileWriter identifier
; ;
import import
java identifier
. .
io identifier
. .
IOException identifier
; ;
public public
class class
Test identifier
{ {
public public
static static
void void_type
main identifier
( (
String type_identifier
[ [
] ]
args identifier
) )
{ {
for for
( (
int int
i identifier
= =
0 decimal_integer_literal
; ;
i identifier
< <
3 decimal_i

In [5]:
result2 = processor.detokenize_code(result)
print(result2)

package tool ;
import java . io . BufferedWriter ;
import java . io . FileWriter ;
import java . io . IOException ;
public class Test {
  public static void main ( String [ ] args ) {
    for ( int i = 0 ;
    i < 3 ;
    i ++ ) {
      System . out . println ( "Hello World!" ) ;
    }
  }
}

