<a href="https://colab.research.google.com/github/ihatefish26/Verilog-Depth-Prediction/blob/main/Verilog_depth.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

VERILOG DEPTH PREDICTION

In [49]:
import os
import re
import pandas as pd
import joblib
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_absolute_error, r2_score, mean_squared_error

def generate_synthetic_verilog_files(output_dir, num_files=500):
    """Generates diverse synthetic Verilog files with increased complexity."""
    os.makedirs(output_dir, exist_ok=True)

    templates = [
        """
        module pipeline (input clk, input [15:0] a, output reg [15:0] y);
            reg [15:0] stage1, stage2, stage3, stage4, stage5;
            always @(posedge clk) begin
                stage1 <= a + 1;
                stage2 <= stage1 + 2;
                stage3 <= stage2 + 3;
                stage4 <= stage3 + 4;
                stage5 <= stage4 + 5;
                y <= stage5;
            end
        endmodule
        """,
        """
        module fsm_large (input clk, input rst, input start, output reg done);
            reg [4:0] state;
            always @(posedge clk or posedge rst) begin
                if (rst) state <= 5'b00000;
                else case (state)
                    5'b00000: state <= start ? 5'b00001 : 5'b00000;
                    5'b00001: state <= 5'b00010;
                    5'b00010: state <= 5'b00011;
                    5'b00011: state <= 5'b00100;
                    5'b00100: state <= 5'b00101;
                    5'b00101: state <= 5'b00110;
                    5'b00110: state <= 5'b00111;
                    5'b00111: state <= 5'b01000;
                    5'b01000: state <= 5'b01001;
                    5'b01001: state <= 5'b00000;
                endcase
            end
            assign done = (state == 5'b01001);
        endmodule
        """,
        """
        module alu (input [15:0] a, input [15:0] b, input [3:0] op, output reg [15:0] result);
            always @(*) begin
                case (op)
                    4'b0000: result = a + b;
                    4'b0001: result = a - b;
                    4'b0010: result = a & b;
                    4'b0011: result = a | b;
                    4'b0100: result = a ^ b;
                    4'b0101: result = (a << 1) | (b >> 1);
                    4'b0110: result = a * b;
                    default: result = 16'b0000000000000000;
                endcase
            end
        endmodule
        """
    ]

    for i in range(num_files):
        file_content = np.random.choice(templates)
        file_path = os.path.join(output_dir, f"synthetic_{i}.v")
        with open(file_path, "w") as f:
            f.write(file_content)

    print(f"Generated {num_files} synthetic Verilog files in {output_dir}")

def train_ai_model(output_dir):
    """Trains an XGBoost model using extracted Verilog features."""
    df = pd.read_csv(os.path.join(output_dir, "verilog_features.csv"))
    df["Depth"] = np.log1p(df["Depth"])  # Apply log transformation to reduce skew
    X = df[["Gate Count", "Flip-Flops", "Conditions", "Modules", "Assignments", "Wires"]]
    y = df["Depth"]
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True)

    model = XGBRegressor(
        objective='reg:squarederror',
        n_estimators=120,
        max_depth=6,
        learning_rate=0.03,
        subsample=0.7,
        colsample_bytree=0.7,
        reg_alpha=2.0,
        reg_lambda=2.0,
        min_child_weight=5,
        gamma=1.0,
        random_state=42
    )

    scores = cross_val_score(model, X_train, y_train, cv=5, scoring="r2")
    print(f"Cross-Validation R² Scores: {scores}")
    print(f"Mean Cross-Validation Score: {scores.mean() * 100:.2f}%")

    model.fit(X_train, y_train)

    y_pred = model.predict(X_test)

    print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_pred):.2f}")
    print(f"Model Accuracy (R² Score): {r2_score(y_test, y_pred) * 100:.2f}%")
    print(f"Root Mean Squared Error: {mean_squared_error(y_test, y_pred):.2f}")

    joblib.dump(model, os.path.join(output_dir, "depth_predictor.pkl"))
    print("Model training completed and saved.")

def predict_full_dataset(output_dir):
    """Predicts logic depth for the full dataset and saves results."""
    df = pd.read_csv(os.path.join(output_dir, "verilog_features.csv"))
    model = joblib.load(os.path.join(output_dir, "depth_predictor.pkl"))
    df["Predicted Depth"] = np.expm1(model.predict(df[["Gate Count", "Flip-Flops", "Conditions", "Modules", "Assignments", "Wires"]]))
    df.to_csv(os.path.join(output_dir, "verilog_predictions.csv"), index=False)
    print("Full dataset predictions saved.")
    print(df[["File", "Depth", "Predicted Depth"]])

# Run full process
generate_synthetic_verilog_files("./synthetic_verilog_data", num_files=500)
extract_features_from_verilog("./synthetic_verilog_data")
train_ai_model("./synthetic_verilog_data")
predict_full_dataset("./synthetic_verilog_data")



Generated 500 synthetic Verilog files in ./synthetic_verilog_data
Feature extraction completed. Data saved to verilog_features.csv
               File  Gate Count  Flip-Flops  Conditions  Depth
0   synthetic_112.v           0           1           0      3
1   synthetic_274.v           0           1           0      3
2   synthetic_245.v           1           1           0      5
3   synthetic_227.v           1           1           0      5
4    synthetic_32.v           0           0           0      0
..              ...         ...         ...         ...    ...
95  synthetic_266.v           0           0           0      0
96  synthetic_250.v           0           0           0      0
97  synthetic_138.v           1           1           0      5
98  synthetic_151.v           0           1           0      3
99  synthetic_288.v           1           1           0      5

[100 rows x 5 columns]
Cross-Validation R² Scores: [0.97605576 0.98230526 0.98092503 0.97775859 0.98184622]
Mean