In [None]:
import os
import subprocess
import ast

LANGUAGES = ['go', 'java', 'javascript', 'php', 'python', 'ruby']
BASE_DIR = 'dataset'


In [None]:
import os
import subprocess
import ast
import tempfile

LANGUAGES = ['go', 'java', 'javascript', 'php', 'python', 'ruby']
BASE_DIR = 'dataset'

def check_syntax_python(filepath):
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            code = f.read()
        ast.parse(code)
        return True
    except SyntaxError:
        return False

def check_syntax_subprocess(command):
    try:
        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        return result.returncode == 0
    except Exception:
        return False

def check_syntax_go(filepath):
    import tempfile

    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            code = f.read()

        if not code.strip().startswith("package"):
            code = "package main\n\n" + code

        # Write original code to temp file
        with tempfile.NamedTemporaryFile(mode='w', suffix='.go', delete=False) as code_file:
            code_file.write(code)
            code_path = code_file.name

        # Run the precompiled checker (assumes go_syntax_checker is in the same directory)
        result = subprocess.run(['./go_syntax_checker', code_path],
                                stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        if result.returncode != 0:
            print("❌ Syntax error:")
            print(result.stderr)
        else:
            print("✅ Syntax is valid.")
        return result.returncode == 0

    except Exception as e:
        print("Exception:", e)
        return False
    finally:
        if result.returncode != 0:
            print("Compilation failed with error message:")
            print(result.stderr)
        else:
            print("Compilation succeeded.")
        if os.path.exists(code_path):
            os.remove(code_path)

def check_syntax_java(filepath):
    try:
        with open(filepath, 'r', encoding='utf-8') as f:
            original_code = f.read()

        # If the snippet doesn't contain a class declaration,
        # wrap it in a dummy class so it forms a valid compilation unit.
        # first line
        first_line = original_code.split('\n')[0].strip()
        # ";" "@" "\u001a" "abstract" "class" "default" "enum" "final" "import" "interface" "module" "native" "non-sealed" "open" "private" "protected" "public" "record" "sealed" "static" "strictfp" "synchronized" "transient" "transitive"
        if "class" not in first_line:
            wrapped_code = "public class SyntaxWrapper {\n" + original_code + "\n}"
        else:
            wrapped_code = original_code

        
        # Write the (possibly wrapped) code to a temporary file.
        with tempfile.NamedTemporaryFile(mode='w', suffix='.java', delete=False) as tmp_file:
            tmp_file.write(wrapped_code)
            tmp_path = tmp_file.name

        # Determine the classpath separator (Windows uses ';', others use ':')
        sep = ';' if os.name == 'nt' else ':'
        # Build the classpath: current directory + the JavaParser jar.
        classpath = f".{sep}javaparser-core-3.25.4.jar"

        # Run the precompiled JavaSyntaxChecker using the temporary file.
        result = subprocess.run([
            'java',
            '-cp', classpath,
            'JavaSyntaxChecker',
            tmp_path
        ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)


        print("Java Syntax Checker Output:")
        print(result.stdout)

        if "INVALID" in result.stdout:
            print("❌ Java syntax error:")
            print(result.stderr)
            return False
        else:
            print("✅ Java syntax is valid.")
            return False

    except Exception as e:
        print("Exception:", e)
        return False
    finally:
        if 'tmp_path' in locals() and os.path.exists(tmp_path):
            os.remove(tmp_path)

def check_file_syntax(language, filepath):
    if language == 'go':
        return check_syntax_go(filepath)
    elif language == 'java':
        return check_syntax_java(filepath)
    elif language == 'javascript':
        return check_syntax_subprocess(['node', '--check', filepath])
    elif language == 'php':
        return check_syntax_subprocess(['php', '-l', filepath])
    elif language == 'python':
        return check_syntax_python(filepath)
    elif language == 'ruby':
        return check_syntax_subprocess(['ruby', '-c', filepath])
    else:
        return False

def check_all_languages():
    for lang in LANGUAGES:
        dir_path = os.path.join(BASE_DIR, lang)
        if not os.path.exists(dir_path):
            print(f"Directory {dir_path} not found.")
            continue

        print(f"\nChecking syntax for language: {lang}")
        total, valid, invalid = 0, 0, 0

        for filename in os.listdir(dir_path):
            file_path = os.path.join(dir_path, filename)
            if os.path.isfile(file_path):
                total += 1
                is_valid = check_file_syntax(lang, file_path)
                if is_valid:
                    valid += 1
                else:
                    invalid += 1
                    print(f"✗ Syntax error in: {file_path}")

        print(f"Summary for {lang}: Total={total}, Valid={valid}, Invalid={invalid}")


In [None]:
check_all_languages()