# Linux Device Driver Code Evaluation System

In [None]:
!pip install transformers accelerate bitsandbytes

Collecting bitsandbytes
  Downloading bitsandbytes-0.46.1-py3-none-manylinux_2_24_x86_64.whl.metadata (10 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=2.0.0->accelerate)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline , BitsAndBytesConfig
import torch

In [None]:
from huggingface_hub import login
from google.colab import userdata

# Retrieve your Hugging Face token from Colab's secrets
hf_token = userdata.get('HF_TOKEN')

# Log in to Hugging Face
login(token=hf_token)

In [None]:
'''since the gcc compiler gives error due to missing header files we need to create fake files
 inorder to mock stimulate the kernel development environment'''
import os

# Create folder to hold fake headers
mock_header_dir = "mock_linux_headers"
linux_subdir = os.path.join(mock_header_dir, "linux")
os.makedirs(linux_subdir, exist_ok=True)

# List of kernel headers
headers = [
    "init.h", "module.h", "device.h","fs.h", "cdev.h", "uaccess.h", "slab.h"
]

# Each will be a stub with #pragma once
for header in headers:
    with open(os.path.join(linux_subdir, header), "w") as f:
        f.write("#pragma once\n")

print(" Mock kernel headers created in mock_linux_headers/linux.")

 Mock kernel headers created in mock_linux_headers/linux.


In [None]:
base_model="mistralai/Mistral-7B-Instruct-v0.1"

In [None]:

# Configure 4-bit quantization settings using the BitsAndBytesConfig class
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,  # Enable loading the model with 4-bit precision for reduced memory usage
    bnb_4bit_quant_type='nf4',  # Use NormalFloat4 (nf4), a quantization format for higher accuracy
    bnb_4bit_compute_dtype=torch.float16,  # Use float16 for computation to balance speed and precision
    bnb_4bit_use_double_quant=True  # Enable double quantization for better numerical stability
)

# Load the pre-trained model with 4-bit quantization
model = AutoModelForCausalLM.from_pretrained(
    base_model,  # Name of the base model defined earlier
    device_map="auto",  # Automatically map model layers to available devices (e.g., GPU/CPU)
    quantization_config=bnb_config,  # Apply the defined 4-bit quantization configuration
)


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/25.1k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/9.94G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.54G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True,use_fast=True)
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

tokenizer_config.json:   0%|          | 0.00/2.10k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/493k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.80M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/414 [00:00<?, ?B/s]

Device set to use cuda:0


In [None]:
generator = pipeline("text-generation", model=model, tokenizer=tokenizer)

Device set to use cuda:0


In [None]:
prompt = "Write a Linux character device driver in C that supports basic read/write operations with a 1KB buffer."

In [None]:
output = generator(prompt, max_new_tokens=1024, do_sample=True, temperature=0.5)
code = output[0]["generated_text"]
print(code)

Setting `pad_token_id` to `eos_token_id`:2 for open-end generation.


Write a Linux character device driver in C that supports basic read/write operations with a 1KB buffer.

```c
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/slab.h>

#define BUFFER_SIZE 1024

struct my_device {
    struct cdev *cdev;
    unsigned char buffer[BUFFER_SIZE];
    int pos;
};

static int my_device_open(struct cdev *cdev, loff_t *f_pos)
{
    struct my_device *mydev = cdev->private_data;
    mydev->pos = 0;
    return 0;
}

static ssize_t my_device_read(struct file *file, char __user *buf,
                               size_t count, loff_t *f_pos)
{
    struct my_device *mydev = file->private_data;
    ssize_t bytes_read;

    if (mydev->pos >= BUFFER_SIZE) {
        // Buffer is full, return -EIO (end-of-file)
        bytes_read = -EIO;
    } else {
        // Read from buffer
        bytes_read = mydev->pos;
        mydev->pos += bytes_read;

        // Copy data to user space
     

In [None]:
import re

In [None]:
code_blocks = re.findall(r"```c(.*?)```", code, re.DOTALL)
print(code_blocks[0].strip())

#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/slab.h>

#define BUFFER_SIZE 1024

struct my_device {
    struct cdev *cdev;
    unsigned char buffer[BUFFER_SIZE];
    int pos;
};

static int my_device_open(struct cdev *cdev, loff_t *f_pos)
{
    struct my_device *mydev = cdev->private_data;
    mydev->pos = 0;
    return 0;
}

static ssize_t my_device_read(struct file *file, char __user *buf,
                               size_t count, loff_t *f_pos)
{
    struct my_device *mydev = file->private_data;
    ssize_t bytes_read;

    if (mydev->pos >= BUFFER_SIZE) {
        // Buffer is full, return -EIO (end-of-file)
        bytes_read = -EIO;
    } else {
        // Read from buffer
        bytes_read = mydev->pos;
        mydev->pos += bytes_read;

        // Copy data to user space
        if (copy_to_user(buf, mydev->buffer, bytes_read)) {
            // Copy failed, return -EFAULT
            

In [None]:
import re
def extract_c_code(raw_output):
  code_blocks = re.findall(r"```c(.*?)```", raw_output, re.DOTALL)
  if code_blocks:
    return code_blocks[0].strip()
  else:
    return raw_output.strip()

cleaned_code = extract_c_code(code)

print(cleaned_code[:300])


#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/fs.h>
#include <linux/cdev.h>
#include <linux/slab.h>

#define BUFFER_SIZE 1024

struct my_device {
    struct cdev *cdev;
    unsigned char buffer[BUFFER_SIZE];
    int pos;
};

static int my_device_open(str


In [None]:
os.makedirs("generated_code",exist_ok=True)

file_path="generated_code/char_driver.c"

with open(file_path,"w") as f:
  f.write(cleaned_code)

print(f"code saved to {file_path}")


code saved to generated_code/char_driver.c


In [None]:
import subprocess

def compile_code(filepath):

  compile_cmd=["gcc","-Wall","-Wextra","-I./mock_linux_headers","-c",filepath,"-o","driver.o"]
  result=subprocess.run(compile_cmd,capture_output=True,text=True)

  return{
      "success":result.returncode==0,
      "stdout":result.stdout,
      "stderr":result.stderr,
      "return_code": result.returncode
    }

compile_result = compile_code(file_path)


In [None]:
print(" Compile Success:", compile_result["success"])
print(" Errors:\n", compile_result["stderr"])

 Compile Success: False
 Errors:
 generated_code/char_driver.c:16:46: error: unknown type name ‘loff_t’
   16 | static int my_device_open(struct cdev *cdev, loff_t *f_pos)
      |                                              ^~~~~~
generated_code/char_driver.c:23:8: error: unknown type name ‘ssize_t’
   23 | static ssize_t my_device_read(struct file *file, char __user *buf,
      |        ^~~~~~~
generated_code/char_driver.c:23:62: error: expected ‘;’, ‘,’ or ‘)’ before ‘*’ token
   23 | static ssize_t my_device_read(struct file *file, char __user *buf,
      |                                                              ^
generated_code/char_driver.c:48:8: error: unknown type name ‘ssize_t’
   48 | static ssize_t my_device_write(struct file *file, char __user *buf,
      |        ^~~~~~~
generated_code/char_driver.c:48:63: error: expected ‘;’, ‘,’ or ‘)’ before ‘*’ token
   48 | static ssize_t my_device_write(struct file *file, char __user *buf,
      |                                

In [None]:
import os
advanced_mock_headers = {
    "fs.h": "struct file { int dummy; };",
    "uaccess.h": "#define __user\n",
    "module.h": "#define THIS_MODULE NULL\n",
    "slab.h": "#define GFP_KERNEL 0\n void *kmalloc(int size, int flags) { return 0; }\n void kfree(void* p) {}",
    "cdev.h": "struct cdev { int dummy; }; int cdev_register(void *c) { return 0; } void cdev_unregister(void *c) {}",
    "init.h": "#define module_init(x)\n#define module_exit(x)",
    "device.h": "#pragma once\n",
    "stddef.h": "#define NULL 0\n#define ENOMEM 12"
}

# Create updated headers
for filename, content in advanced_mock_headers.items():
    with open(os.path.join(mock_header_dir, filename), "w") as f:
        f.write(content)

print(" Advanced mock headers updated.")


 Advanced mock headers updated.


In [None]:
compile_result = compile_code(file_path)

In [None]:
print(" Compile Success:", compile_result["success"])
print(" Errors:\n", compile_result["stderr"])

 Compile Success: False
 Errors:
 generated_code/char_driver.c:16:46: error: unknown type name ‘loff_t’
   16 | static int my_device_open(struct cdev *cdev, loff_t *f_pos)
      |                                              ^~~~~~
generated_code/char_driver.c:23:8: error: unknown type name ‘ssize_t’
   23 | static ssize_t my_device_read(struct file *file, char __user *buf,
      |        ^~~~~~~
generated_code/char_driver.c:23:62: error: expected ‘;’, ‘,’ or ‘)’ before ‘*’ token
   23 | static ssize_t my_device_read(struct file *file, char __user *buf,
      |                                                              ^
generated_code/char_driver.c:48:8: error: unknown type name ‘ssize_t’
   48 | static ssize_t my_device_write(struct file *file, char __user *buf,
      |        ^~~~~~~
generated_code/char_driver.c:48:63: error: expected ‘;’, ‘,’ or ‘)’ before ‘*’ token
   48 | static ssize_t my_device_write(struct file *file, char __user *buf,
      |                                

In [None]:
def static_code_analysis(filepath):
    with open(filepath, 'r') as f:
        code = f.read()

    lines = code.split('\n')
    comment_lines = [l for l in lines if l.strip().startswith("//") or l.strip().startswith("/*")]
    function_defs = [l for l in lines if "(" in l and ")" in l and "{" in l and not l.strip().startswith("#")]
    has_copy_user = any("copy_to_user" in l or "copy_from_user" in l for l in lines)
    has_malloc = any("malloc" in l or "kmalloc" in l for l in lines)
    has_null_check = any("== NULL" in l or "= NULL" in l for l in lines)
    uses_goto = any("goto" in l for l in lines)
    todos = [l for l in lines if "TODO" in l.upper()]

    return {
        "total_lines": len(lines),
        "function_count": len(function_defs),
        "comment_lines": len(comment_lines),
        "uses_copy_user": has_copy_user,
        "uses_malloc": has_malloc,
        "has_null_check": has_null_check,
        "uses_goto": uses_goto,
        "todos": len(todos)
    }


In [None]:
static_metrics = static_code_analysis(file_path)
print(static_metrics)


{'total_lines': 122, 'function_count': 9, 'comment_lines': 14, 'uses_copy_user': True, 'uses_malloc': False, 'has_null_check': False, 'uses_goto': True, 'todos': 0}


In [None]:
class MockKernalHeader:
  def __init__(self,root_dir="mock_linux_headers"):
    self.root_dir=root_dir
    self.linux_dir=os.path.join(root_dir,"linux")
    self.basic_headers=["init.h" "module.h", "device.h",
                        "fs.h", "cdev.h", "uaccess.h", "slab.h"]
    self.advanced_mocks = {
              "fs.h": "struct file { int dummy; };",
              "uaccess.h": "#define __user\n",
              "module.h": "#define THIS_MODULE NULL\n",
              "slab.h": "#define GFP_KERNEL 0\n void *kmalloc(int size, int flags) { return 0; }\n void kfree(void* p) {}",
              "cdev.h": "struct cdev { int dummy; }; int cdev_register(void *c) { return 0; } void cdev_unregister(void *c) {}",
              "init.h": "#define module_init(x)\n#define module_exit(x)",
              "device.h": "#pragma once\n",
              "stddef.h": "#define NULL 0\n#define ENOMEM 12"
          }


    def create_linux_dir(self):
      if not os.path.exists(self.linux_dir):
        os.makedirs(self.linux_dir)


    def create_basic_headers(self):
      for header in self.basic_headers:
        with open(os.path.join(self.linux_dir, header), "w") as f:
          f.write("#pragma once\n")

    def advanced_mocks_header(self)

