In [2]:
import pathlib

def find_java_files(directory):
    path = pathlib.Path(directory)
    for file in path.glob('**/*.java'):
        yield file

In [3]:

# jdk21 的路径逻辑和 jdk8 完全不同
jdk8_path = r"C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes"
jdk21_path = r"C:\Users\ider\Downloads\openjdk\src"

In [77]:
# test
import re
# line = "public class SearchFilter implements AttrFilter {"
file_path = r"c:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\jndi\toolkit\dir\SearchFilter.java"

with open(file_path, 'r') as file:
    data = file.read()

# 正则表达式匹配多行注释
data = re.sub(r'/\*.*?\*/', '', data, flags=re.DOTALL)

# 正则表达式匹配单行注释
data = re.sub(r'//.*', '', data)

lines = data.splitlines()
# 过滤掉空行
lines = [line for line in lines if line.strip()]

line = lines[7]
class_pattern = re.compile(r'^\s*(final public\s+)?(public abstract\s+)?(public final\s+)?(abstract\s+)?(public\s+)?(final\s+)?(class|interface|enum)\s+(\w+)')
match = class_pattern.match(line)
if match:
    class_name = match.group(8)
    print(class_name)


SearchFilter


In [105]:
import re
import time

def handle_file(file_path):
    with open(file_path, 'r') as file:
        data = file.read()

    # 正则表达式匹配多行注释
    data = re.sub(r'/\*.*?\*/', '', data, flags=re.DOTALL)

    # 正则表达式匹配单行注释
    data = re.sub(r'//.*', '', data)
    
       # 正则表达式匹配包名
    pattern = re.compile(r'package\s+([a-zA_Z_][\w.]*);')
    match = pattern.search(data)

    # 提取包名
    package_name = match.group(1) if match else None
    
    # 正则表达式匹配import语句
    pattern = re.compile(r'import\s+([\w.]+\*?);')
    matches = pattern.findall(data)

    # 提取import包名
    import_names = [match for match in matches]
    
    lines = data.splitlines()

    # 过滤掉空行
    lines = [line for line in lines if line.strip()]

    # 正则表达式匹配类定义
    class_pattern = re.compile(r'^\s*(final public\s+)?(public abstract\s+)?(public final\s+)?(abstract\s+)?(public\s+)?(final\s+)?(class|interface|enum)\s+(\w+)')
    brace_pattern = re.compile(r'{|}')

    classes = []
    brace_count = 0
    class_name = None
    class_start_line = None

    for i, line in enumerate(lines):
        if class_name is None:
            # 查找类定义
            match = class_pattern.match(line)
            if match:
                class_name = match.group(8)
                class_start_line = i
                brace_count = 0  # Reset brace count for new class 

        braces = brace_pattern.findall(line)
        brace_count += braces.count('{')
        brace_count -= braces.count('}')
        if class_start_line and (i - class_start_line + 1) > 3 and brace_count == 0 and class_name is not None:
            # 类定义结束
            class_line_count = i - class_start_line + 1
            classes.append((class_name, class_line_count))
            class_name = None  # Reset for next class
    if class_name:
        classes.append((class_name, i - class_start_line + 1))

    # return "\n".join(lines)
    return {
        "package_name": package_name,
        "import_names": import_names,
        "classes" : classes,
        # "data":  "\n".join(lines),
    }


file = r"c:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\jndi\toolkit\dir\SearchFilter.java"
file = r"C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\java\util\Hashtable.java"
out = handle_file(file)
print(out['package_name'])
print(out['import_names'])
print(out['classes'])
# print(out['data'])


java.util
['java.io.*', 'java.util.concurrent.ThreadLocalRandom', 'java.util.function.BiConsumer', 'java.util.function.Function', 'java.util.function.BiFunction']
[('Hashtable', 781)]


In [113]:
import collections
import csv
    
def handle_jdk(jdk_path, jdk_version):

    java_details = []
    for file in find_java_files(jdk_path):
        out = handle_file(file)
        if not out['classes']:
            print("miss class:", file)
            # print(out['data'])
            # time.sleep(1)
        else:
            java_details.append(out)
            # break
    print(jdk_version,"class count:", len(java_details))
    
        
    name_collect = collections.defaultdict(list)
    fa_collect = collections.defaultdict(list)

    for item in java_details:
        for class_item in item["classes"]:
            data = {
                "count": 0,
                "lines": class_item[1]
            }
            name_collect[(item['package_name'],class_item[0])].append(data)
            fa_collect[item['package_name']].append(data)
            
            
    for item in java_details:
        for package_name in item["import_names"]:
            if package_name.endswith(".*"):
                p_name = package_name.replace(".*", "")
                if p_name in fa_collect:
                    for row in fa_collect[p_name]:
                        row["count"] += 1
            else:
                ps = package_name.split(".")
                class_name = ps.pop()
                p_name = ".".join(ps)
                key = (p_name, class_name)
                if key in name_collect:
                    for row in name_collect[key]:
                        row["count"] += 1
                
    # 保存成 csv
    with open(f"{jdk_version}.csv","wt", newline='')as f:
        writer = csv.writer(f)
        writer.writerow(["package","className","lineCount","citedCount"])
        for key,items in name_collect.items():
            if len(items) > 1:
                items.sort(key=lambda x:-x['lines'])

            writer.writerow([key[0],key[1],items[0]["lines"],items[0]["count"]])
        
handle_jdk(jdk8_path, "jdk8")
handle_jdk(jdk21_path, "jdk21")

miss class: C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\jarsigner\package-info.java
miss class: C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\jdi\package-info.java
miss class: C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\jdi\connect\package-info.java
miss class: C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\jdi\connect\spi\package-info.java
miss class: C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\jdi\event\package-info.java
miss class: C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\jdi\request\package-info.java
miss class: C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\management\package-info.java
miss class: C:\Users\ider\Downloads\openjdk-8u43+b03_src\openjdk\jdk\src\share\classes\com\sun\net\httpserver\package-info.java
miss c

In [109]:
# print(id(name_collect[('com.oracle.net', 'Sdp')][0]))
# for key,items in name_collect.items():
#     if len(items) > 1:
#         print(key, items)

[{'count': 23, 'lines': 124},
 {'count': 23, 'lines': 71},
 {'count': 23, 'lines': 24}]