## Numbering:

In [2]:
import os
import re

# Input and output folders
input_dir = "contracts"
output_dir = "numbered_contracts"
os.makedirs(output_dir, exist_ok=True)

# Regex to detect and remove leading numbering like "123: "
line_number_pattern = re.compile(r'^\s*\d+\s*:\s*')

# Process files in sorted order (1.sol, 2.sol, ...)
for filename in sorted(os.listdir(input_dir), key=lambda x: int(x.split('.')[0])):
    if filename.endswith(".sol"):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)

        with open(input_path, "r", encoding="utf-8") as infile:
            lines = infile.readlines()

        cleaned_lines = []
        for line in lines:
            # Remove all prepended line numbers
            cleaned = line
            while line_number_pattern.match(cleaned):
                cleaned = line_number_pattern.sub('', cleaned)
            cleaned_lines.append(cleaned)

        # Write clean, re-numbered lines
        with open(output_path, "w", encoding="utf-8") as outfile:
            for i, line in enumerate(cleaned_lines, start=1):
                outfile.write(f"{i}: {line}")

        print(f"Cleaned and numbered: {filename}")


Cleaned and numbered: 0.sol
Cleaned and numbered: 1.sol
Cleaned and numbered: 2.sol
Cleaned and numbered: 3.sol
Cleaned and numbered: 4.sol
Cleaned and numbered: 5.sol
Cleaned and numbered: 6.sol
Cleaned and numbered: 7.sol
Cleaned and numbered: 8.sol
Cleaned and numbered: 9.sol
Cleaned and numbered: 10.sol
Cleaned and numbered: 11.sol
Cleaned and numbered: 12.sol
Cleaned and numbered: 13.sol
Cleaned and numbered: 14.sol
Cleaned and numbered: 15.sol
Cleaned and numbered: 16.sol
Cleaned and numbered: 17.sol
Cleaned and numbered: 18.sol
Cleaned and numbered: 19.sol
Cleaned and numbered: 20.sol
Cleaned and numbered: 21.sol
Cleaned and numbered: 22.sol
Cleaned and numbered: 23.sol
Cleaned and numbered: 24.sol
Cleaned and numbered: 25.sol
Cleaned and numbered: 26.sol
Cleaned and numbered: 27.sol
Cleaned and numbered: 28.sol
Cleaned and numbered: 29.sol
Cleaned and numbered: 30.sol
Cleaned and numbered: 31.sol
Cleaned and numbered: 32.sol
Cleaned and numbered: 33.sol
Cleaned and numbered: 34

## Checking the correctness of vulnerableLines and vulnerableCode:

In [15]:
import os
import json
import re

sol_dir= "numbered_contracts"

def clean(code_str):
    return re.sub(r"[{}; \s\t\n]", "", code_str)
    
def extract_lines_from_solidity(solidity_file, start, end):
    """Extract specific lines from a numbered Solidity file"""
    extracted_lines = []
    with open(solidity_file, "r", encoding="utf-8") as f:
        for line in f:
            match = re.match(r"^\s*(\d+):\s(.*)", line)
            if match:
                line_num = int(match.group(1))
                content = match.group(2).strip()
                if start <= line_num <= end:
                    extracted_lines.append(content)
    return extracted_lines

def compare_vulnerabilities(json_data, json_path, sol_files_dir):
    # Derive file name from JSON file name (e.g., 0.json -> 0.sol)
    file_stem = os.path.splitext(os.path.basename(json_path))[0]
    solidity_file_path = os.path.join(sol_files_dir, f"{file_stem}.sol")

    if not os.path.exists(solidity_file_path):
        print(f"[ERROR] Solidity file not found: {solidity_file_path}")
        return

    for i, vuln in enumerate(json_data.get("vulnerabilities", [])):
        lines = vuln.get("vulnerableLines", "")
        vulnerable_code = vuln.get("vulnerableCode", [])

        if not lines or not vulnerable_code:
            print(f"[SKIPPED] No vulnerable lines or code in entry {i} of {json_path}")
            continue

        try:
            start_line, end_line = map(int, lines.strip().split("-"))
        except ValueError:
            print(f"[ERROR] Invalid line range format '{lines}' in {json_path}")
            continue

        actual_code = extract_lines_from_solidity(solidity_file_path, start_line, end_line)

        # Strip both sets for comparison ignoring spacing, {, }, and ;
        clean = lambda x: re.sub(r"[{}; \t]", "", x)
        actual_joined = clean("".join(actual_code))
        vuln_joined = clean("".join(vulnerable_code))

        if actual_joined != vuln_joined:
            print(f"[MISMATCH] {json_path} :: Block {i} :: Lines {lines}")
            print("Expected:")
            for v in vulnerable_code:
                print("  >", v)
            print("Found:")
            for a in actual_code:
                print("  <", a)
            print()

def process_all_json(json_dir, sol_dir):
    for file_name in os.listdir(json_dir):
        if file_name.endswith(".json"):
            json_path = os.path.join(json_dir, file_name)
            try:
                with open(json_path, "r", encoding="utf-8") as f:
                    data = json.load(f)
                compare_vulnerabilities(data, json_path, sol_dir)
            except Exception as e:
                print(f"[ERROR] Failed to process {file_name}: {e}")

# Example usage
process_all_json("LOCS", sol_dir)


[MISMATCH] LOCS\106.json :: Block 0 :: Lines 168-168
Expected:
  > require (receiverAddress.call.value(amountInWei).gas(msg.gas.sub(5000))());
Found:
  < c.balance = newBalance;

[MISMATCH] LOCS\106.json :: Block 1 :: Lines 192-193
Expected:
  > msg.sender.transfer(amountToTransfer);
  > c.balance = 0;
Found:
  < msg.sender.transfer(amountToTransfer);
  < ContributorBalanceChanged(msg.sender, 0);

[MISMATCH] LOCS\111.json :: Block 0 :: Lines 311-313
Expected:
  > if (!address(admin).call.value(_com)())
  > _p3d = _com;
Found:
  < uint256 _affID;
  < 
  < if (_affCode == address(0) || _affCode == msg.sender)

[MISMATCH] LOCS\113.json :: Block 0 :: Lines 354-354
Expected:
  > if (!address(coin_base).call.value(_com)())
Found:
  < if (_affCode == '' || _affCode == plyr_[_pID].name)

[MISMATCH] LOCS\113.json :: Block 2 :: Lines 149-149
Expected:
  > plyr_[_affID].addr.transfer(_aff);
Found:
  < 

[MISMATCH] LOCS\130.json :: Block 0 :: Lines 240-253
Expected:
  > if (_eth > 0)
  >     plyr_

In [14]:
# Run this in the next cell to list mismatched JSON files
mismatched_files = []

def clean(code_str):
    return re.sub(r"[{}; \s\t\n]", "", code_str)

def extract_lines_from_solidity(solidity_file, start, end):
    extracted_lines = []
    with open(solidity_file, "r", encoding="utf-8") as f:
        for line in f:
            match = re.match(r"^\s*(\d+):\s(.*)", line)
            if match:
                line_num = int(match.group(1))
                content = match.group(2).strip()
                if start <= line_num <= end:
                    extracted_lines.append(content)
    return extracted_lines

def compare_vulnerabilities(json_data, json_path, sol_files_dir):
    file_stem = os.path.splitext(os.path.basename(json_path))[0]
    solidity_file_path = os.path.join(sol_files_dir, f"{file_stem}.sol")

    if not os.path.exists(solidity_file_path):
        return

    mismatch_found = False
    for i, vuln in enumerate(json_data.get("vulnerabilities", [])):
        lines = vuln.get("vulnerableLines", "")
        vulnerable_code = vuln.get("vulnerableCode", [])

        if not lines or not vulnerable_code:
            continue

        try:
            start_line, end_line = map(int, lines.strip().split("-"))
        except ValueError:
            continue

        actual_code = extract_lines_from_solidity(solidity_file_path, start_line, end_line)

        actual_joined = clean("".join(actual_code))
        vuln_joined = clean("".join(vulnerable_code))

        if actual_joined != vuln_joined:
            mismatch_found = True

    if mismatch_found:
        mismatched_files.append(os.path.basename(json_path))

# Set your paths
json_dir = "LOCS"
sol_dir = "numbered_contracts"

# Process and collect mismatches
for file_name in os.listdir(json_dir):
    if file_name.endswith(".json"):
        json_path = os.path.join(json_dir, file_name)
        try:
            with open(json_path, "r", encoding="utf-8") as f:
                data = json.load(f)
            compare_vulnerabilities(data, json_path, sol_dir)
        except:
            continue

# Display result
i=0
print("=== MISMATCHED JSON FILES ===")
for name in mismatched_files:
    i= i+1
    print(name)
print(i)


=== MISMATCHED JSON FILES ===
106.json
111.json
113.json
130.json
133.json
143.json
149.json
151.json
153.json
154.json
156.json
16.json
161.json
164.json
167.json
17.json
176.json
189.json
197.json
20.json
202.json
205.json
208.json
209.json
212.json
213.json
216.json
228.json
234.json
235.json
244.json
250.json
255.json
258.json
259.json
269.json
274.json
278.json
28.json
282.json
294.json
295.json
296.json
297.json
299.json
302.json
303.json
305.json
306.json
31.json
313.json
314.json
316.json
317.json
32.json
321.json
325.json
326.json
327.json
33.json
336.json
339.json
34.json
341.json
343.json
344.json
349.json
350.json
353.json
354.json
355.json
357.json
358.json
360.json
361.json
362.json
363.json
364.json
365.json
37.json
372.json
374.json
376.json
379.json
380.json
382.json
384.json
387.json
391.json
392.json
396.json
399.json
400.json
401.json
402.json
41.json
410.json
412.json
415.json
416.json
419.json
424.json
428.json
429.json
430.json
431.json
433.json
438.json
439.json