In [7]:
import requests
from bs4 import BeautifulSoup
from sentence_transformers import SentenceTransformer
from pinecone import Pinecone, ServerlessSpec, PodSpec
import ollama
import os
import uuid
import time
import pickle
import pandas as pd
import re
import pandas as pd
from rouge import Rouge
import pandas as pd
from nltk.translate.bleu_score import sentence_bleu
import nltk
import warnings
warnings.filterwarnings('ignore')
import openpyxl
from bert_score import score
import itertools
import hf_xet
import zlib
import subprocess
import tempfile

PINECONE_API_KEY = "pcsk_71bnuL_HGU1YACobTvL5gJNzHsZG1NMNx3RGmz1ohyC7xMiUYoWnuZpEn5SuvWpuTxnuzm"
PINECONE_ENVIRONMENT = "us-east-1"

# --- Constants ---
INDEX_NAME = "rag-docstring"
EMBEDDING_MODEL = 'all-MiniLM-L6-v2' # HuggingFace sentence transformer
OLLAMA_MODEL = 'deepseek-coder:6.7b' # Local Ollama model name (Ensure this is pulled: `ollama pull qwen2.5-coder:1.5b`)
OLLAMA_REWRITER_MODEL = 'deepseek-r1:1.5b'
TARGET_URL = [
    "https://peps.python.org/pep-0257/",
    "https://www.kaggle.com/code/hagzilla/what-are-docstrings",
    "https://github.com/keleshev/pep257/blob/master/pep257.py",
    "https://github.com/chadrik/doc484",
    "https://zerotomastery.io/blog/python-docstring/",
    "https://google.github.io/styleguide/pyguide.html",
    "https://www.geeksforgeeks.org/python-docstrings/",
    "https://pandas.pydata.org/docs/development/contributing_docstring.html",
    "https://www.coding-guidelines.lftechnology.com/docs/python/docstrings/",
    "https://realpython.com/python-pep8/",
    "https://pypi.org/project/AIDocStringGenerator/",
    "https://www.geeksforgeeks.org/pep-8-coding-style-guide-python/",
    "https://llego.dev/posts/write-python-docstrings-guide-documenting-functions/",
    "https://www.datacamp.com/tutorial/pep8-tutorial-python-code",
    "https://www.programiz.com/python-programming/docstrings",
    "https://marketplace.visualstudio.com/items?itemName=ShanthoshS.docstring-generator-ext",
    "https://stackoverflow.com/questions/3898572/what-are-the-most-common-python-docstring-formats",
    "https://stackoverflow.com/questions/78753860/what-is-the-proper-way-of-including-examples-in-python-docstrings",
    "https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html",
    "https://www.dataquest.io/blog/documenting-in-python-with-docstrings/",
    "https://www.tutorialspoint.com/python/python_docstrings.htm"
]
VECTOR_DIMENSION = 384 # Dimension for all-MiniLM-L6-v2
METRIC = "cosine"
CLOUD = "aws"
REGION = "us-east-1"

print("Initializing services...")
try:
    model = SentenceTransformer(EMBEDDING_MODEL)
    print("Embedding model loaded.")

    # Pinecone
    pc = Pinecone(api_key=PINECONE_API_KEY)
    print(f"Pinecone initialized.") # Environment info is handled internally

    # Ollama Client
    ollama_client = ollama.Client()
    print(f"Ollama client initialized. Attempting to use model: {OLLAMA_MODEL}")
    print(f"Ensure '{OLLAMA_MODEL}' is available locally in Ollama (`ollama pull {OLLAMA_MODEL}`).")


except Exception as e:
    print(f"Error initializing services: {e}")
    exit()

Initializing services...
Embedding model loaded.
Pinecone initialized.
Ollama client initialized. Attempting to use model: deepseek-coder:6.7b
Ensure 'deepseek-coder:6.7b' is available locally in Ollama (`ollama pull deepseek-coder:6.7b`).


In [9]:
# --- 1. Initialize Pinecone ---
pinecone_index = None
if not PINECONE_API_KEY:
    print("ERROR: Pinecone API key not found in environment variables.")
    exit(1)
try:
    pc = Pinecone(api_key=PINECONE_API_KEY)
    existing_indexes = [index_info["name"] for index_info in pc.list_indexes()]
    print(f"Available Pinecone indexes: {existing_indexes}")

    if INDEX_NAME not in existing_indexes:
        print(f"Index '{INDEX_NAME}' not found. Creating new index...")
        pc.create_index(
            name=INDEX_NAME, dimension=VECTOR_DIMENSION, metric=METRIC,
            spec=ServerlessSpec(cloud=CLOUD, region=REGION)
        )
        while not pc.describe_index(INDEX_NAME).status["ready"]:
            print(f"Waiting for index '{INDEX_NAME}' to become ready...")
            time.sleep(5)
        print(f"Index '{INDEX_NAME}' created and ready.")
    else:
        print(f"Connecting to existing index '{INDEX_NAME}'.")
        # Optional: Clear index if you want to re-index fresh
        # print(f"WARNING: Deleting all vectors from existing index '{INDEX_NAME}'...")
        # index_to_clear = pc.Index(INDEX_NAME)
        # index_to_clear.delete(delete_all=True)
        # print(f"All vectors deleted from '{INDEX_NAME}'.")

    pinecone_index = pc.Index(INDEX_NAME)
    print(f"Successfully connected to index '{INDEX_NAME}'. Stats: {pinecone_index.describe_index_stats()}")
except Exception as e:
    print(f"ERROR: Failed to initialize or connect to Pinecone index '{INDEX_NAME}': {e}")
    exit(1)

Available Pinecone indexes: ['fusion-rag-docstring', 'self-rag-docstring', 'rag-docstring', 'corrective-rag-docstring', 'code-aware-rag-docstring']
Connecting to existing index 'rag-docstring'.
Successfully connected to index 'rag-docstring'. Stats: {'dimension': 384,
 'index_fullness': 0.0,
 'metric': 'cosine',
 'namespaces': {'': {'vector_count': 14}},
 'total_vector_count': 14,
 'vector_type': 'dense'}


In [11]:
# --- Load Data into Pinecone (Only if index is empty) ---
index_stats = pinecone_index.describe_index_stats()
if index_stats.total_vector_count == 0:
    total_docs_loaded = 0
    # Loop through each URL in the list
    for url in TARGET_URL:
        print(f"\nProcessing URL: {url}")
        try:
            # Fetch URL content
            response = requests.get(url, timeout=30) # Use timeout
            response.raise_for_status() # Check for HTTP errors

            # Parse HTML
            soup = BeautifulSoup(response.content, 'html.parser')
            main_content = soup.find('main') or soup.find('article') or soup.find('body')
            page_text = ""
            if main_content:
                page_text = main_content.get_text(separator='\n', strip=True)
            else:
                page_text = soup.get_text(separator='\n', strip=True) # Fallback

            if not page_text or len(page_text) < 50: # Basic check for meaningful content
                print(f" -> Warning: Could not extract sufficient text content from {url}. Skipping.")
                continue # Skip to the next URL

            print(f" -> Extracted text length: {len(page_text)} characters.")

            # Generate embedding
            # Note: Encoding large pages as a single vector might lose detail.
            # Chunking the text into smaller parts is better for real applications.
            embedding = model.encode(page_text).tolist()

            # Prepare and upsert data
            doc_id = str(uuid.uuid4())
            metadata = {"text": page_text, "source": url} # Store the specific URL as source

            pinecone_index.upsert(vectors=[(doc_id, embedding, metadata)])
            print(f" -> Data from {url} loaded into Pinecone with ID: {doc_id}")
            total_docs_loaded += 1
            time.sleep(0.5) # Small delay to be polite to the server

        except requests.exceptions.RequestException as e:
            # Handle errors fetching specific URL, continue with the next
            print(f" -> Error fetching URL {url}: {e}")
            continue
        except Exception as e:
            # Handle other errors during processing/upserting for this URL
            print(f" -> Error processing or upserting data for {url}: {e}")
            continue

    if total_docs_loaded > 0:
        print("Waiting a moment for indexing...")
        time.sleep(2)
        print(pinecone_index.describe_index_stats()) # Show final stats
    else:
        print("Warning: No documents were loaded into the index.")

else:
    print(f"\nIndex already contains {index_stats.total_vector_count} vectors. Skipping data loading.")


Index already contains 14 vectors. Skipping data loading.


In [13]:
# *** ADDED: Get separate query for context retrieval ***
def context_qry(user_code):
    context_query = (f"""
    Provide clear, concise, informative, and accurate docstrings for the given python code following PEP 257 conventions and standards, 
    to generate the content for a Python docstring based on the provided code snippet and relevant PEP contexts.
    
    **Instructions:**
    1.  Start with a concise summary line explaining the function/method's purpose.
    2.  If applicable, add a blank line and then more detailed explanation.
    3.  Use the 'Args:' section to describe each parameter, its type, and what it represents.
    4.  Use the 'Returns:' section to describe the return value and its type.
    5.  Use the 'Raises:' section to list any exceptions explicitly raised by the code.
    6.  Adhere strictly to PEP 257 formatting.
    7.  Base the docstring primarily on the 'Code Snippet to Document'. Use the 'Relevant Context' for 
    clarification or examples if needed.
    
    Also, check relevant content for the user given input code: {user_code}
    """)
    return context_query


In [15]:
def revised_prompt(ctx, helper_model_name):
    context = ctx
    OLLAMA_REWRITER_MODEL = helper_model_name
    rewritten_request = None
    rewriter_prompt = f"""
    You are an helpful assistant that refines prompts. Given the following context from the RAG knowledge base along with python code: {context}, generate an optimized prompt for another AI whose sole task is to create a Python docstring for the code and your output.
    The optimized prompt should clearly state the task, and subtly incorporate hints from the context if relevant, without necessarily repeating the entire context.
    Focus on creating a self-contained, clear instruction for the next AI.
    
    Generate only the optimized context prompt text for the docstring generation AI.
    """
    try:
        rewriter_response = ollama_client.generate(
            model=OLLAMA_REWRITER_MODEL,
            prompt=rewriter_prompt,
            #options={'temperature': 0.3} # Lower temperature for more focused rewriting
        )
        rewritten_request = rewriter_response.get('response', '').strip()
    except Exception as e:
        rewritten_request = rewriter_prompt # Ensure it's None on error
    return rewritten_request

In [17]:
def final_content_generation(context, user_code, rewritten_req):
    messages = [
        {'role': 'system', 'content': 'You are an expert Python programmer tasked with generating docstrings. You will receive context (if found), and the code to document in the final message. Use the context only if it is directly relevant to explaining the provided code. Return only the docstring and dont include the given python code in the output.'}
    ]
    
    # Add the retrieved context as a separate user message, if it exists
    if context:
        # Include source information in the context message for clarity
        messages.append({'role': 'user', 'content': f"Here is potentially relevant context retrieved from python\n{user_code}\n and content :\n---\n{rewritten_req}\n---"})
    else:
        # Explicitly state if no context was found or provided
        messages.append({'role': 'user', 'content': "No specific context was retrieved or provided for this request."})
    
    # Add the final user message with the code and the explicit request
    messages.append({'role': 'user', 'content': f"Based on any relevant context provided earlier, generate the Python docstring for the following code:\n```python\n{user_code}\n```\n\nOutput *only* the complete docstring content itself, starting with triple quotes. Dont include python codes in the output. You need to generate a single docstring as whole for given python class code."})
    
    try:
        response = ollama_client.chat(
            model=OLLAMA_MODEL,
            messages=messages
        )
    
        generated_docstring = response.get('message', {}).get('content', '').strip()
        if generated_docstring.startswith("```python"):
            generated_docstring = generated_docstring[len("```python"):].strip()
        elif generated_docstring.startswith("```"):
             generated_docstring = generated_docstring[len("```"):].strip()
    
        if generated_docstring.endswith("```"):
            generated_docstring = generated_docstring[:-len("```")].strip()
    
        # Ensure it starts with triple quotes if possible, otherwise print as is
        if not (generated_docstring.startswith('"""') or generated_docstring.startswith("'''")):
             print("(Model might not have generated a perfectly formatted docstring)")
    except Exception as e:
        print(f"Error communicating with Ollama chat endpoint: {e}")
    return generated_docstring

In [19]:
def run_simple_rag_pipeline(user_code, pinecone_index, emb_model, ollama_llm_client, helper_model_name, generator_model_name):
    user_cd = user_code
    context_query = context_qry(user_cd)
    context = ""
    source = "N/A"
    if pinecone_index and context_query.strip():
        query_embedding = model.encode(context_query).tolist()

        search_results = pinecone_index.query(
            vector=query_embedding,
            top_k=1, # Get the single most relevant document chunk
            include_metadata=True
        )
        if search_results.matches:
            match = search_results.matches[0]
            context = match.metadata.get('text', '')
        else:
            context = user_code
        retrieved_contexts_list.append(context)
        rewritten_req = revised_prompt(context, helper_model_name)
        rewritten_contexts_list.append(rewritten_req)
        final_content = final_content_generation(context, user_cd, rewritten_req)
        return final_content

In [21]:
#run_simple_rag_pipeline(user_code, pinecone_index, EMBEDDING_MODEL, ollama_client, OLLAMA_MODEL, OLLAMA_REWRITER_MODEL)

In [23]:
#Loading all the classes via pickle

In [25]:
class_files_df = pd.read_pickle('class_files_df.pkl')

In [27]:
#class_files_df = class_files_df[:5]

In [29]:
ground_truth = class_files_df["Comments"].to_list()

In [31]:
generated_docstrings_list = []
retrieved_contexts_list = []
rewritten_contexts_list = []

In [33]:
#total_rows = len(class_files_df)

In [35]:
for i, row in class_files_df.iterrows():
    print(i)
    user_code = row["Code_without_comments"]
    output = run_simple_rag_pipeline(user_code, pinecone_index, EMBEDDING_MODEL, ollama_client, OLLAMA_MODEL, OLLAMA_REWRITER_MODEL)
    generated_docstrings_list.append(output)
class_files_df["RAG_Docstring"] = generated_docstrings_list

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
(Model might not have generated a perfectly formatted docstring)
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66


In [37]:
class_files_df

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,"""""""\nAdamax is an extension of the Adam optimi..."
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,"""""""\nThis is a custom transformer that perform..."
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,"""""""\nAveragePooling1D is a subclass of Pooling..."
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,"""""""\nAveragePooling2D is a subclass of Pooling..."
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,"""""""\nA subclass of `Pooling3D` that performs a..."
...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,"""""""\nUpSampling3D is a type of Layer that perf..."
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,"""""""\nZeroPadding1D is a subclass of Keras' Lay..."
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,"""""""\nZeroPadding2D is a custom layer class use..."
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,"""""""\nZeroPadding3D is a specific layer used wi..."


In [39]:
def clean_rag_docstring(docstring_text):
    if not isinstance(docstring_text, str):
        return docstring_text

    if docstring_text.startswith("# ERROR:") or docstring_text.startswith("# SKIPPED:"):
        return docstring_text

    text = docstring_text.strip()

    if text.startswith("```python"):
        text = text[len("```python"):].strip()
    elif text.startswith("```"):
        text = text[len("```"):].strip()
    if text.endswith("```"):
        text = text[:-len("```")].strip()

    content_inside_quotes = None
    first_double_quotes = text.find('"""')
    if first_double_quotes != -1:
        last_double_quotes = text.rfind('"""')
        if last_double_quotes > first_double_quotes and (last_double_quotes + 3) <= len(text):
            content_inside_quotes = text[first_double_quotes + 3 : last_double_quotes].strip()

    if content_inside_quotes is None or not content_inside_quotes.strip():
        first_single_quotes = text.find("'''")
        if first_single_quotes != -1:
            last_single_quotes = text.rfind("'''")
            if last_single_quotes > first_single_quotes and (last_single_quotes + 3) <= len(text):
                content_inside_quotes = text[first_single_quotes + 3 : last_single_quotes].strip()
    
    if content_inside_quotes is not None and content_inside_quotes.strip():
        final_text_to_clean = content_inside_quotes
    else:
        final_text_to_clean = text
        if final_text_to_clean.startswith('"""') and final_text_to_clean.endswith('"""') and len(final_text_to_clean) >= 6:
            final_text_to_clean = final_text_to_clean[3:-3].strip()
        elif final_text_to_clean.startswith("'''") and final_text_to_clean.endswith("'''") and len(final_text_to_clean) >= 6:
            final_text_to_clean = final_text_to_clean[3:-3].strip()

    final_text_to_clean = re.sub(r"(?i)^class\s+\w+:\s*\n?", "", final_text_to_clean).strip()
    
    return final_text_to_clean

class_files_df["RAG_Docstring"] = class_files_df["RAG_Docstring"].astype(str).apply(clean_rag_docstring)

In [41]:
class_files_df

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,Adamax is an extension of the Adam optimizatio...
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,This is a custom transformer that performs fea...
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,AveragePooling1D is a subclass of Pooling1D th...
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,AveragePooling2D is a subclass of Pooling2D th...
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,A subclass of `Pooling3D` that performs averag...
...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,UpSampling3D is a type of Layer that performs ...
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding1D is a subclass of Keras' Layer cl...
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding2D is a custom layer class used for...
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding3D is a specific layer used within ...


In [43]:
def calculate_rouge(df, reference_column, hypothesis_column):
    rouge = Rouge()

    def calculate_score(row):
        scores = rouge.get_scores(row[hypothesis_column].lower(), row[reference_column].lower())
        return scores[0]['rouge-1']['f']

    df['ROUGE-1 ' + reference_column] = df.apply(calculate_score, axis=1)
    return df

# Calculate ROUGE-1 scores
data_1 = calculate_rouge(class_files_df, 'Comments', 'RAG_Docstring')

In [44]:
data_1

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring,ROUGE-1 Comments
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,Adamax is an extension of the Adam optimizatio...,0.204969
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,This is a custom transformer that performs fea...,0.096386
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,AveragePooling1D is a subclass of Pooling1D th...,0.345382
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,AveragePooling2D is a subclass of Pooling2D th...,0.240000
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,A subclass of `Pooling3D` that performs averag...,0.445230
...,...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,UpSampling3D is a type of Layer that performs ...,0.272000
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding1D is a subclass of Keras' Layer cl...,0.163934
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding2D is a custom layer class used for...,0.272059
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding3D is a specific layer used within ...,0.197719


In [45]:
def calculate_bleu(df, reference_column, hypothesis_column):
    nltk.download('punkt')

    def calculate_score(row):
        reference = [row[reference_column].lower().split()]
        hypothesis = row[hypothesis_column].lower().split()
        score = sentence_bleu(reference, hypothesis, weights=(0.25, 0.25, 0.25, 0.25))
        return score

    df['BLEU Score ' + reference_column] = df.apply(calculate_score, axis=1)
    return df

In [46]:
# Calculate BLEU scores
data_1 = calculate_bleu(data_1, 'Comments', 'RAG_Docstring')

[nltk_data] Downloading package punkt to
[nltk_data]     /Users/balajivenktesh/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


In [47]:
#data_1.to_pickle('comments_3.pkl')

In [51]:
#data_1.to_excel('comments_3.xlsx')

In [55]:
data_1

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring,ROUGE-1 Comments,BLEU Score Comments
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,Adamax is an extension of the Adam optimizatio...,0.204969,2.909382e-155
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,This is a custom transformer that performs fea...,0.096386,7.399357e-232
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,AveragePooling1D is a subclass of Pooling1D th...,0.345382,3.004795e-02
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,AveragePooling2D is a subclass of Pooling2D th...,0.240000,2.507493e-02
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,A subclass of `Pooling3D` that performs averag...,0.445230,1.480461e-01
...,...,...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,UpSampling3D is a type of Layer that performs ...,0.272000,9.407484e-79
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding1D is a subclass of Keras' Layer cl...,0.163934,6.983278e-79
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding2D is a custom layer class used for...,0.272059,5.164530e-02
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding3D is a specific layer used within ...,0.197719,3.446904e-155


In [57]:
# Calculate BERT encoding score, using cosine similarity
def calculate_bert_score(ground_truth, generated):
    # Calculate BERT score
    _, _, bert_score_f1 = score([ground_truth], [generated], lang='en', model_type='bert-base-uncased')

    return bert_score_f1.item()   

In [59]:
# Calculate BLEU scores
list_append_1 = []
for index, row in data_1.iterrows():
    list_append_1.append(calculate_bert_score(str(row["Comments"]), str(row["RAG_Docstring"])))

In [60]:
data_1["Accuracy"] = list_append_1

In [61]:
data_1

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring,ROUGE-1 Comments,BLEU Score Comments,Accuracy
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,Adamax is an extension of the Adam optimizatio...,0.204969,2.909382e-155,0.617686
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,This is a custom transformer that performs fea...,0.096386,7.399357e-232,0.395134
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,AveragePooling1D is a subclass of Pooling1D th...,0.345382,3.004795e-02,0.558474
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,AveragePooling2D is a subclass of Pooling2D th...,0.240000,2.507493e-02,0.600608
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,A subclass of `Pooling3D` that performs averag...,0.445230,1.480461e-01,0.707121
...,...,...,...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,UpSampling3D is a type of Layer that performs ...,0.272000,9.407484e-79,0.603818
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding1D is a subclass of Keras' Layer cl...,0.163934,6.983278e-79,0.568016
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding2D is a custom layer class used for...,0.272059,5.164530e-02,0.664046
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding3D is a specific layer used within ...,0.197719,3.446904e-155,0.623957


In [62]:
# Calculate number of syllables in docstring
def count_syllables(word):
    # Remove punctuation
    word = re.sub(r'[^a-zA-Z]', '', word)
    
    # Vowel count
    vowels = 'aeiouy'
    syllables = 0
    last_was_vowel = False
    for char in word:
        if char.lower() in vowels:
            if not last_was_vowel:
                syllables += 1
            last_was_vowel = True
        else:
            last_was_vowel = False
    
    # Adjust syllable count for words ending in 'e'
    if word.endswith(('e', 'es', 'ed')):
        syllables -= 1
    
    # Adjust syllable count for words with no vowels
    if syllables == 0:
        syllables = 1
    
    return syllables

In [63]:
# Calculate Flesch reading score
def flesch_reading_ease(text):
    sentences = text.count('.') + text.count('!') + text.count('?') + 1
    words = len(re.findall(r'\b\w+\b', text))
    syllables = sum(count_syllables(word) for word in text.split())
    
    # Calculate Flesch Reading Ease score
    score = 206.835 - 1.015 * (words / sentences) - 84.6 * (syllables / words)
    
    return score

In [64]:
# Calculate Easy scores
list_append_2 = []
for index, row in data_1.iterrows():
    list_append_2.append(flesch_reading_ease(str(row["RAG_Docstring"])))

In [65]:
data_1["Ease"] = list_append_2

In [66]:
data_1

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring,ROUGE-1 Comments,BLEU Score Comments,Accuracy,Ease
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,Adamax is an extension of the Adam optimizatio...,0.204969,2.909382e-155,0.617686,48.048128
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,This is a custom transformer that performs fea...,0.096386,7.399357e-232,0.395134,46.463790
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,AveragePooling1D is a subclass of Pooling1D th...,0.345382,3.004795e-02,0.558474,60.116429
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,AveragePooling2D is a subclass of Pooling2D th...,0.240000,2.507493e-02,0.600608,51.443057
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,A subclass of `Pooling3D` that performs averag...,0.445230,1.480461e-01,0.707121,56.083861
...,...,...,...,...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,UpSampling3D is a type of Layer that performs ...,0.272000,9.407484e-79,0.603818,56.250000
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding1D is a subclass of Keras' Layer cl...,0.163934,6.983278e-79,0.568016,51.756328
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding2D is a custom layer class used for...,0.272059,5.164530e-02,0.664046,45.357763
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding3D is a specific layer used within ...,0.197719,3.446904e-155,0.623957,46.354139


In [67]:
#%%
def compress(input):
	return zlib.compress(input.encode())

In [68]:
def conciness(ground_truth, generated):
    comp1 = compress(ground_truth)
    comp2 = compress(generated)
    return sys.getsizeof(comp2) / sys.getsizeof(comp1)

In [69]:
# Calculate Conciseness scores
list_append_3 = []
for index, row in data_1.iterrows():
    list_append_3.append(conciness(str(row["Comments"]), str(row["RAG_Docstring"])))

In [70]:
data_1["Conciseness"] = list_append_3

In [71]:
data_1

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring,ROUGE-1 Comments,BLEU Score Comments,Accuracy,Ease,Conciseness
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,Adamax is an extension of the Adam optimizatio...,0.204969,2.909382e-155,0.617686,48.048128,0.706717
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,This is a custom transformer that performs fea...,0.096386,7.399357e-232,0.395134,46.463790,4.348624
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,AveragePooling1D is a subclass of Pooling1D th...,0.345382,3.004795e-02,0.558474,60.116429,0.549145
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,AveragePooling2D is a subclass of Pooling2D th...,0.240000,2.507493e-02,0.600608,51.443057,0.502455
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,A subclass of `Pooling3D` that performs averag...,0.445230,1.480461e-01,0.707121,56.083861,0.829885
...,...,...,...,...,...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,UpSampling3D is a type of Layer that performs ...,0.272000,9.407484e-79,0.603818,56.250000,1.275410
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding1D is a subclass of Keras' Layer cl...,0.163934,6.983278e-79,0.568016,51.756328,1.673347
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding2D is a custom layer class used for...,0.272059,5.164530e-02,0.664046,45.357763,0.877039
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding3D is a specific layer used within ...,0.197719,3.446904e-155,0.623957,46.354139,1.095890


In [72]:
#data_1.to_excel('comments_3.xlsx')

In [73]:
def calculate_parameter_coverage(code_str, docstring_str):
    """
    Calculates the proportion of function/method parameters mentioned in the docstring.
    Returns a float (0.0 to 1.0) or None if no parameters are found in the code.
    """        
    match = re.search(r"def\s+\w+\s*\((.*?)\):", code_str)
    if not match:
        match = re.search(r"async\s+def\s+\w+\s*\((.*?)\):", code_str) 

    if not match:
        return None 

    params_str = match.group(1)
    if not params_str.strip(): 
        return 1.0 

    potential_params = [p.strip().split('=')[0].split(':')[0].strip() for p in params_str.split(',')]
    actual_params = [p for p in potential_params if p and p not in ('self', 'cls') and not p.startswith('*')]

    if not actual_params:
        return 1.0 

    covered_params = 0
    docstring_lower = docstring_str.lower()
    for param_name in actual_params:
        if re.search(r"\b" + re.escape(param_name.lower()) + r"\b", docstring_lower):
            covered_params += 1
        elif f"{param_name.lower()}:" in docstring_lower or f"parameter {param_name.lower()}" in docstring_lower:
             covered_params += 1
    return covered_params / len(actual_params) if actual_params else 1.0

In [74]:
# --- Return Value Coverage Calculation Function ---
def calculate_return_coverage(code_str, docstring_str):
    """
    Checks if the docstring mentions a return value if the code seems to have one.
    Returns 1 if covered/not applicable, 0 if potentially missing, None on error.
    """
    has_return_statement = False
    for line in code_str.splitlines():
        stripped_line = line.strip()
        if stripped_line.startswith("return ") and not stripped_line.endswith("return None") and len(stripped_line) > len("return "):
            has_return_statement = True
            break
    
    if not has_return_statement:
        return 1.0 

    docstring_lower = docstring_str.lower()
    return_keywords = ["return", "returns", "yield", "yields"] 
    if any(keyword in docstring_lower for keyword in return_keywords):
        return 1.0
    else:
        return 0.0

In [75]:
# --- Basic Faithfulness Metric Function ---
def calculate_basic_faithfulness(generated_docstring, retrieved_context_text):
    """
    Calculates a basic faithfulness score based on token overlap.
    This is a crude proxy for actual faithfulness.
    Returns a float (0.0 to 1.0) or None.
    """
    # Simple tokenization and stopword removal
    stop_words = set(["a", "an", "the", "is", "are", "was", "were", "be", "been", "being", "have", "has", "had", "do", "does", "did", "will", "would", "should", "can", "could", "may", "might", "must", "and", "or", "but", "if", "of", "at", "by", "for", "with", "about", "to", "in", "on", "this", "that", "it", "its", "you", "your", "i", "me", "my", "he", "she", "him", "her", "they", "them", "their"])
    
    try:
        gen_tokens = set(token.lower() for token in re.findall(r'\b\w+\b', generated_docstring) if token.lower() not in stop_words)
        ctx_tokens = set(token.lower() for token in re.findall(r'\b\w+\b', retrieved_context_text) if token.lower() not in stop_words)
    except Exception as e:
        print(f"Error tokenizing for faithfulness: {e}")
        return None

    if not gen_tokens: # If generated docstring has no valid tokens after filtering
        return 0.0 

    overlapping_tokens = gen_tokens.intersection(ctx_tokens)
    
    return len(overlapping_tokens) / len(gen_tokens) if gen_tokens else 0.0

In [76]:
def calculate_exception_coverage(code_str, docstring_str):
    if not all(isinstance(s, str) for s in [code_str, docstring_str]) or not docstring_str.strip() or docstring_str.startswith(("# ERROR:", "# SKIPPED:")): return None
    raised_exceptions = set(re.findall(r"raise\s+(\w+)", code_str)) # Basic: finds exception names
    if not raised_exceptions: return 1.0 # No exceptions to cover
    
    docstring_lower = docstring_str.lower()
    mentions_raises_section = "raises:" in docstring_lower
    covered_exceptions = 0
    for exc_name in raised_exceptions:
        if re.search(r"\b" + re.escape(exc_name.lower()) + r"\b", docstring_lower):
            covered_exceptions += 1
            
    # If a "Raises:" section exists, it's good, even if not all specific exceptions are named (simple check)
    if mentions_raises_section and raised_exceptions: return 1.0 
    if not raised_exceptions: return 1.0 # Should have been caught above
    return covered_exceptions / len(raised_exceptions) if raised_exceptions else 1.0

In [77]:
# --- Adherence to Docstring Conventions (Pydocstyle) ---
PYDOCSTYLE_ENABLED = True
def check_docstring_adherence_pydocstyle(code_str, generated_docstring_content):
    """
    Checks adherence of a generated docstring to PEP 257 using pydocstyle.
    The generated_docstring_content should be the *content* of the docstring,
    not including the triple quotes.
    Returns:
        float: A score from 0.0 to 1.0 (1.0 means no errors, 0.0 means many errors).
               Returns None if pydocstyle is not enabled or an error occurs.
    """
    # Sanitize content for embedding within triple quotes
    safe_content = generated_docstring_content.replace('\\', '\\\\') # Escape backslashes
    safe_content = safe_content.replace('"""', '\\"\\"\\"') # Escape internal triple-double-quotes
    safe_content = safe_content.replace("'''", "\\'\\'\\'") # Escape internal triple-single-quotes
    
    # Prepare the content for insertion, ensuring correct indentation for multi-line docstrings
    lines = safe_content.split('\n')
    if len(lines) == 1:
        # Single line docstring content, no special indentation needed beyond the initial one
        indented_docstring_body = lines[0]
    else:
        # Multi-line: first line as is, subsequent lines indented with 4 spaces
        # This assumes the docstring will be placed with an initial 4-space indent.
        indented_docstring_body = lines[0] + '\n' + '\n'.join(['    ' + line for line in lines[1:]])


    # Construct a minimal, valid Python snippet for pydocstyle
    # Try to place the docstring correctly within a class or function if identifiable
    code_prefix = ""
    code_suffix = "\n    pass" # Default suffix

    class_match = re.search(r"^(.*\bclass\s+\w+\s*\(?.*\)?:)", code_str, re.MULTILINE)
    func_match = re.search(r"^(.*\b(async\s+)?def\s+\w+\s*\(?.*\)?:)", code_str, re.MULTILINE)

    if class_match:
        header = class_match.group(1)
        # Find the end of the header line to insert the docstring
        code_prefix = code_str[:class_match.end()] + f'\n    """{indented_docstring_body}"""'
        code_suffix = code_str[class_match.end():] # The rest of the original class code
        # Ensure there's at least a 'pass' or some body if the original was just a header
        if not code_suffix.strip() or code_suffix.strip().startswith("#"):
            code_suffix = "\n    pass" + code_suffix 
        code_for_pydocstyle_check = code_prefix + code_suffix

    elif func_match:
        header = func_match.group(1)
        code_prefix = code_str[:func_match.end()] + f'\n    """{indented_docstring_body}"""'
        code_suffix = code_str[func_match.end():]
        if not code_suffix.strip() or code_suffix.strip().startswith("#"):
            code_suffix = "\n    pass" + code_suffix
        code_for_pydocstyle_check = code_prefix + code_suffix
    else:
        # Fallback: treat as module-level docstring if no class/def found
        # This is less ideal as the original code_str might not be a full module
        code_for_pydocstyle_check = f'"""{generated_docstring_content}"""\n{code_str}'


    errors_count = 0
    filtered_errors_count = 0
    tmp_file_path = None 
    try:
        with tempfile.NamedTemporaryFile(mode='w', suffix='.py', delete=False, encoding='utf-8') as tmp_file:
            tmp_file.write(code_for_pydocstyle_check)
            tmp_file_path = tmp_file.name
        
        command = ['pydocstyle', tmp_file_path]
        process = subprocess.run(command, capture_output=True, text=True, encoding='utf-8')
        
        output = process.stdout.strip()
        print
        if output:
            all_errors = output.splitlines()
            errors_count = len(all_errors)
            # Filter out D100 (Missing docstring in public module) as it's an artifact
            # and D101, D102, D103 if we are only checking the first docstring.
            # For now, just D100 as the dummy structure is a module.
            filtered_errors = [err for err in all_errors if not err.strip().endswith("D100: Missing docstring in public module")]
            filtered_errors_count = len(filtered_errors)
        
        if process.stderr:
            if "Cannot parse file" in process.stderr or "unexpected EOF while parsing" in process.stderr or "invalid syntax" in process.stderr :
                 print(f"Pydocstyle CRITICAL PARSE ERROR for temp file {tmp_file_path}: {process.stderr}")
                 print("--- Content written to temp file that failed parsing: ---")
                 print(code_for_pydocstyle_check)
                 print("--------------------------------------------------------")
                 return 0.0 # Penalize heavily for parse error

    except Exception as e:
        print(f"An exception occurred during pydocstyle check: {str(e)}")
        if tmp_file_path and os.path.exists(tmp_file_path): # Check if tmp_file_path was assigned
             try:
                with open(tmp_file_path, 'r', encoding='utf-8') as f_err:
                    print(f"Content of temp file '{tmp_file_path}' that caused exception:\n{f_err.read()}")
             except Exception as read_err:
                print(f"Could not read temp file {tmp_file_path}: {read_err}")
        return None # Error during check
    finally:
        if tmp_file_path and os.path.exists(tmp_file_path):
            os.remove(tmp_file_path)
    
    # Normalize score based on filtered errors.
    # Using 10 as the denominator makes the score less harsh than 5.
    return max(0.0, 1.0 - (filtered_errors_count / 10.0))

In [78]:
#data = pd.read_excel("./comments_3.xlsx")

In [79]:
#data

In [80]:
param_coverage_list = []
return_coverage_list = []
faithfulness_list = []

In [81]:
for index, row in data_1.iterrows():
    param_coverage_list.append(calculate_parameter_coverage(str(row["Code_without_comments"]), str(row["RAG_Docstring"])))

In [82]:
data_1["Parameter_Coverage"] = param_coverage_list

In [83]:
for index, row in data_1.iterrows():
    return_coverage_list.append(calculate_return_coverage(str(row["Code_without_comments"]), str(row["RAG_Docstring"])))

In [84]:
data_1["Return_Coverage"] = return_coverage_list

In [85]:
data_1

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring,ROUGE-1 Comments,BLEU Score Comments,Accuracy,Ease,Conciseness,Parameter_Coverage,Return_Coverage
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,Adamax is an extension of the Adam optimizatio...,0.204969,2.909382e-155,0.617686,48.048128,0.706717,1.000000,1.0
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,This is a custom transformer that performs fea...,0.096386,7.399357e-232,0.395134,46.463790,4.348624,1.000000,1.0
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,AveragePooling1D is a subclass of Pooling1D th...,0.345382,3.004795e-02,0.558474,60.116429,0.549145,1.000000,1.0
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,AveragePooling2D is a subclass of Pooling2D th...,0.240000,2.507493e-02,0.600608,51.443057,0.502455,0.800000,1.0
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,A subclass of `Pooling3D` that performs averag...,0.445230,1.480461e-01,0.707121,56.083861,0.829885,0.833333,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,UpSampling3D is a type of Layer that performs ...,0.272000,9.407484e-79,0.603818,56.250000,1.275410,0.750000,1.0
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding1D is a subclass of Keras' Layer cl...,0.163934,6.983278e-79,0.568016,51.756328,1.673347,1.000000,1.0
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding2D is a custom layer class used for...,0.272059,5.164530e-02,0.664046,45.357763,0.877039,0.666667,1.0
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding3D is a specific layer used within ...,0.197719,3.446904e-155,0.623957,46.354139,1.095890,0.750000,1.0


In [86]:
data_1["Retrieved_Contexts"] = retrieved_contexts_list

In [87]:
for index, row in data_1.iterrows():
    faithfulness_list.append(calculate_basic_faithfulness(str(row["RAG_Docstring"]), str(row["Retrieved_Contexts"])))
    #faithfulness_list.append(faithfulness_score)
#if faithfulness_score is not None: print(f"    -> Basic Faithfulness: {faithfulness_score:.4f}")

In [88]:
data_1["Faithfulness_Score"] = faithfulness_list

In [89]:
data_1

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring,ROUGE-1 Comments,BLEU Score Comments,Accuracy,Ease,Conciseness,Parameter_Coverage,Return_Coverage,Retrieved_Contexts,Faithfulness_Score
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,Adamax is an extension of the Adam optimizatio...,0.204969,2.909382e-155,0.617686,48.048128,0.706717,1.000000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.194444
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,This is a custom transformer that performs fea...,0.096386,7.399357e-232,0.395134,46.463790,4.348624,1.000000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.264151
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,AveragePooling1D is a subclass of Pooling1D th...,0.345382,3.004795e-02,0.558474,60.116429,0.549145,1.000000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.276923
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,AveragePooling2D is a subclass of Pooling2D th...,0.240000,2.507493e-02,0.600608,51.443057,0.502455,0.800000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.240506
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,A subclass of `Pooling3D` that performs averag...,0.445230,1.480461e-01,0.707121,56.083861,0.829885,0.833333,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.216495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,UpSampling3D is a type of Layer that performs ...,0.272000,9.407484e-79,0.603818,56.250000,1.275410,0.750000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.268519
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding1D is a subclass of Keras' Layer cl...,0.163934,6.983278e-79,0.568016,51.756328,1.673347,1.000000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.321739
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding2D is a custom layer class used for...,0.272059,5.164530e-02,0.664046,45.357763,0.877039,0.666667,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.315789
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding3D is a specific layer used within ...,0.197719,3.446904e-155,0.623957,46.354139,1.095890,0.750000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.330189


In [90]:
pydocstyle_adherence_list_1 = []

In [91]:
pydocstyle_adherence_list_1

[]

In [92]:
for index, row in data_1.iterrows():
    pydocstyle_adherence_list_1.append(check_docstring_adherence_pydocstyle(str(row["Code_without_comments"]), str(row["RAG_Docstring"])))

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Av

In [93]:
data_1["PythonStyle_Adherence"] = pydocstyle_adherence_list_1

In [94]:
exception_coverage_list = []

In [95]:
for index, row in data_1.iterrows():
    exception_coverage_list.append(calculate_exception_coverage(str(row["Code_without_comments"]), str(row["RAG_Docstring"])))

In [96]:
data_1["Exception_Coverage"] = exception_coverage_list

In [97]:
data_1

Unnamed: 0,Full_code,Comments,Code_without_comments,Clean_classes,RAG_Docstring,ROUGE-1 Comments,BLEU Score Comments,Accuracy,Ease,Conciseness,Parameter_Coverage,Return_Coverage,Retrieved_Contexts,Faithfulness_Score,PythonStyle_Adherence,Exception_Coverage
0,"class Adamax(optimizer_v2.OptimizerV2):\n ""...","""""""Optimizer that implements the Adamax algori...",class Adamax(optimizer_v2.OptimizerV2):\n \...,class dummy_class_1(optimizer_v2.OptimizerV2):...,Adamax is an extension of the Adam optimizatio...,0.204969,2.909382e-155,0.617686,48.048128,0.706717,1.000000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.194444,0.3,1.0
1,class AgglomerationTransform(TransformerMixin)...,"""""""\n A class for feature agglomeration via...",class AgglomerationTransform(TransformerMixin)...,class dummy_class_1(TransformerMixin):\n \n...,This is a custom transformer that performs fea...,0.096386,7.399357e-232,0.395134,46.463790,4.348624,1.000000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.264151,0.0,1.0
2,"class AveragePooling1D(Pooling1D):\n """"""Ave...","""""""Average pooling for temporal data.\n\n Dow...",class AveragePooling1D(Pooling1D):\n \n\n ...,class dummy_class_1(Pooling1D):\n \n\n d...,AveragePooling1D is a subclass of Pooling1D th...,0.345382,3.004795e-02,0.558474,60.116429,0.549145,1.000000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.276923,0.0,1.0
3,"class AveragePooling2D(Pooling2D):\n """"""Ave...","""""""Average pooling operation for spatial data....",class AveragePooling2D(Pooling2D):\n \n\n ...,class dummy_class_1(Pooling2D):\n \n\n d...,AveragePooling2D is a subclass of Pooling2D th...,0.240000,2.507493e-02,0.600608,51.443057,0.502455,0.800000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.240506,0.3,1.0
4,"class AveragePooling3D(Pooling3D):\n """"""Ave...","""""""Average pooling operation for 3D data (spat...",class AveragePooling3D(Pooling3D):\n \n\n ...,class dummy_class_1(Pooling3D):\n \n\n d...,A subclass of `Pooling3D` that performs averag...,0.445230,1.480461e-01,0.707121,56.083861,0.829885,0.833333,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.216495,0.1,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
62,"class UpSampling3D(Layer):\n """"""Upsampling ...","""""""Upsampling layer for 3D inputs.\n\n Repeat...",class UpSampling3D(Layer):\n \n\n def __...,class dummy_class_1(Layer):\n \n\n def d...,UpSampling3D is a type of Layer that performs ...,0.272000,9.407484e-79,0.603818,56.250000,1.275410,0.750000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.268519,0.0,1.0
63,"class ZeroPadding1D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 1D input (e.g. tempo...",class ZeroPadding1D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding1D is a subclass of Keras' Layer cl...,0.163934,6.983278e-79,0.568016,51.756328,1.673347,1.000000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.321739,0.0,1.0
64,"class ZeroPadding2D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 2D input (e.g. pictu...",class ZeroPadding2D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding2D is a custom layer class used for...,0.272059,5.164530e-02,0.664046,45.357763,0.877039,0.666667,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.315789,0.0,0.0
65,"class ZeroPadding3D(Layer):\n """"""Zero-paddi...","""""""Zero-padding layer for 3D data (spatial or ...",class ZeroPadding3D(Layer):\n \n\n def _...,class dummy_class_1(Layer):\n \n\n def d...,ZeroPadding3D is a specific layer used within ...,0.197719,3.446904e-155,0.623957,46.354139,1.095890,0.750000,1.0,"Python Docstrings\nLast Updated :\n02 Aug, 202...",0.330189,0.0,0.0


In [98]:
data_1.to_excel('./deepseek/Normal_RAG.xlsx')

In [99]:
data_1.to_pickle('./deepseek/Normal_RAG.pkl')