In [1]:
pip list

Package            Version
------------------ ------------
accelerate         1.5.2
asttokens          3.0.0
certifi            2025.1.31
charset-normalizer 3.4.1
colorama           0.4.6
comm               0.2.2
debugpy            1.8.13
decorator          5.2.1
exceptiongroup     1.2.2
executing          2.2.0
filelock           3.18.0
fsspec             2025.3.0
huggingface-hub    0.29.3
idna               3.10
ipykernel          6.29.5
ipython            8.34.0
jedi               0.19.2
Jinja2             3.1.6
joblib             1.4.2
jupyter_client     8.6.3
jupyter_core       5.7.2
MarkupSafe         3.0.2
matplotlib-inline  0.1.7
mpmath             1.3.0
nest-asyncio       1.6.0
networkx           3.4.2
numpy              2.2.4
packaging          24.2
pandas             2.2.3
parso              0.8.4
pillow             11.0.0
pip                25.0.1
platformdirs       4.3.7
prompt_toolkit     3.0.50
psutil             7.0.0
pure_eval          0.2.3
Pygments           2.19.1
p

In [None]:
import torch
print("PyTorch GPU:", torch.cuda.is_available())
print("GPU Device Count:", torch.cuda.device_count())

if torch.cuda.is_available():
    print("GPU Name:", torch.cuda.get_device_name(0))
    print("CUDA Version:", torch.version.cuda)
    print("cuDNN Version:", torch.backends.cudnn.version())

    # Pytorch select GPU
    device = torch.device("cuda")
    x = torch.rand((1000, 1000), device=device)
    print("GPU Run")
else:
    print("Gpu Error")

PyTorch 是否支持 GPU: True
GPU 设备数量: 1
GPU 设备名称: NVIDIA GeForce RTX 3070 Ti Laptop GPU
CUDA 版本: 12.6
cuDNN 版本: 90501
张量计算成功，GPU 运行正常！


1.First Train, use cgt-main (T/F weather sol code has bug or not)

In [None]:
import pandas as pd
df = pd.read_csv("cgt-main/consolidated.csv", sep=";")

# property_holds count
value_counts = df["property_holds"].value_counts()
print("Raw counts of property_holds:")
print(value_counts)

# calculate percentage
percentages = df["property_holds"].value_counts(normalize=True) * 100
print("\nPercentage distribution of property_holds:")
print(percentages)


Raw counts of property_holds:
property_holds
f    14836
t     5619
Name: count, dtype: int64

Percentage distribution of property_holds:
property_holds
f    72.529944
t    27.470056
Name: proportion, dtype: float64


In [None]:
import os
import pandas as pd
import torch
from torch.utils.data import Dataset
from transformers import RobertaTokenizer, RobertaForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split
from tqdm import tqdm

df = pd.read_csv("cgt-main/consolidated.csv", sep=";")

# check solidity exist, remove "nan" value
def has_source(fp_sol):
    sol_path = os.path.join("cgt-main", "source", f"{fp_sol}.sol")
    return os.path.isfile(sol_path)

df = df[df["fp_sol"].apply(has_source)]

# transfer property_holds to label： "t" -> 1， "f" -> 0
df['label'] = df['property_holds'].map({'t': 1, 'f': 0})

# read Solidity Source code, sol as txt
def read_source(fp_sol):
    sol_path = os.path.join("cgt-main", "source", f"{fp_sol}.sol")
    try:
        with open(sol_path, "r", encoding="utf-8") as f:
            return f.read()
    except FileNotFoundError:
        print(f"file {sol_path} missing, continue")
        return ""

df['code'] = df['fp_sol'].apply(read_source)

# filter "nan" out，select code and label
data = df[['code', 'label']].dropna()
data = data[data['code'] != ""]
S
solidity_data = []
for _, row in tqdm(df.iterrows(), total=len(df), desc="load Solidity fileS"):
    fp_sol = row["fp_sol"]
    sol_path = os.path.join("cgt-main", "source", f"{fp_sol}.sol")
    try:
        with open(sol_path, "r", encoding="utf-8") as f:
            code = f.read()
        solidity_data.append({
            "contract_name": row["contractname"],
            "code": code,
            "bug_type": row["property"],
            "swc_id": row["swc"],
            "dasp_id": row["dasp"]
        })
    except FileNotFoundError:
        print(f"file {sol_path} missing, continue")
        
# JSON file saved
df_solidity = pd.DataFrame(solidity_data)
df_solidity.to_json("aaaaa.json", indent=4)

# train-validate separate
train_texts, val_texts, train_labels, val_labels = train_test_split(
    data['code'].tolist(), data['label'].tolist(), test_size=0.2, random_state=42
)

# define PyTorch Dataset class
class SolidityDataset(Dataset):
    def __init__(self, texts, labels, tokenizer, max_length=512):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.texts)
    
    def __getitem__(self, idx):
        text = self.texts[idx]
        # auto cut code lenth
        encoding = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors="pt"
        )
        item = {key: val.squeeze(0) for key, val in encoding.items()}
        item['labels'] = torch.tensor(self.labels[idx], dtype=torch.long)
        return item

# load CodeBERT tokenizer and model form（microsoft/codebert-base）
model_name = "microsoft/codebert-base"
tokenizer = RobertaTokenizer.from_pretrained(model_name)
model = RobertaForSequenceClassification.from_pretrained(model_name, num_labels=2)

# !!!!!!!!!!!!!!!!!!!!!!! need further explore
train_dataset = SolidityDataset(train_texts, train_labels, tokenizer)
val_dataset = SolidityDataset(val_texts, val_labels, tokenizer)

# Trainer define and tune
training_args = TrainingArguments(
    output_dir="./codebert-finetuned",
    num_train_epochs=3,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    evaluation_strategy="steps",
    eval_steps=500,
    save_steps=1000,
    warmup_steps=100,
    weight_decay=0.01,
    logging_dir='./logs',
    logging_steps=100,
    save_total_limit=2,
    disable_tqdm=False
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# TRAINING
trainer.train()

# Show result
eval_result = trainer.evaluate()
print("Evaluation results:", eval_result)


加载 Solidity 文件: 100%|██████████| 19456/19456 [00:02<00:00, 7606.14it/s]
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at microsoft/codebert-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss,Validation Loss
500,0.6143,0.586869
1000,0.627,0.60127
1500,0.5922,0.590859
2000,0.5615,0.575169
2500,0.5688,0.574748
3000,0.582,0.541934
3500,0.5437,0.551171
4000,0.5864,0.537969
4500,0.5412,0.551556
5000,0.5323,0.536011


Evaluation results: {'eval_loss': 0.5426416397094727, 'eval_runtime': 140.6119, 'eval_samples_per_second': 27.679, 'eval_steps_per_second': 3.463, 'epoch': 3.0}


In [None]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_recall_fscore_support

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    acc = accuracy_score(labels, predictions)
    precision, recall, f1, _ = precision_recall_fscore_support(labels, predictions, average='binary')
    return {"accuracy": acc, "precision": precision, "recall": recall, "f1": f1}

# same compute_metrics chunk as previous 
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
    compute_metrics=compute_metrics
)

# evaluate
eval_result = trainer.evaluate()

# get result
accuracy = eval_result.get("accuracy", eval_result.get("eval_accuracy", None))
if accuracy is not None:
    print("Bug detection rate: {:.2%}".format(accuracy))
    print("Precision: {:.2%}".format(eval_result.get("precision", 0)))
    print("Recall: {:.2%}".format(eval_result.get("recall", 0)))
    print("F1 score: {:.2%}".format(eval_result.get("f1", 0)))
else:
    print("Evaluation results did not return an accuracy metric.")

#!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!Precision, Recall, F1 score error, maybe something wrong

Bug detection rate: 75.62%
Precision: 0.00%
Recall: 0.00%
F1 score: 0.00%
