In [1]:
import os
import inspect
import openai

In [2]:
# Read API Key
with open('API_KEY.txt') as f:
    api_key = f.readlines()[0]

# Set API Key
openai.api_key = api_key

## Automatic Code Explainer

Aim - Given an existing code in any language, generate Docstring for the code to improve code explainability.

Rules for now:
```
1. Code is always written in Python.
2. Code is always a function.
3. Docstring starts and ends with """ """  or ''' '''
```

In [3]:
# Sample fucntion to train a PyTorch Model
def train(model, train_loader, device, criterion, optimizer):
    model.train()
    running_loss = AverageMeter()

    for data in train_loader:
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)

        prediction = model(inputs)
        loss = criterion(prediction, labels)
        running_loss.update(loss.item(), len(inputs))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return running_loss

In [4]:
def docstring_prompt(code):
    prompt = f"{code}\n # A high quality python docstring of the above Python function:\n \"\"\""
    return prompt

In [5]:
# Convert Python Code/Function to String to pass as input
print(inspect.getsource(train))

def train(model, train_loader, device, criterion, optimizer):
    model.train()
    running_loss = AverageMeter()

    for data in train_loader:
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)

        prediction = model(inputs)
        loss = criterion(prediction, labels)
        running_loss.update(loss.item(), len(inputs))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return running_loss



In [6]:
print(docstring_prompt(inspect.getsource(train)))

def train(model, train_loader, device, criterion, optimizer):
    model.train()
    running_loss = AverageMeter()

    for data in train_loader:
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)

        prediction = model(inputs)
        loss = criterion(prediction, labels)
        running_loss.update(loss.item(), len(inputs))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return running_loss

 # A high quality python docstring of the above Python function:
 """


In [7]:
# API Call
# Codex Models - https://platform.openai.com/docs/models/codex
response = openai.Completion.create(model='code-cushman-001', # code-davinci-002, code-cushman-001
                                   prompt=docstring_prompt(inspect.getsource(train)),
                                   temperature=0,
                                   max_tokens=512,
                                   top_p=1.0,
                                   frequency_penalty=0,
                                   presence_penalty=0,
                                   stop=["\"\"\""])   # Stop Token - Marks end of DocString

In [8]:
response

<OpenAIObject text_completion id=cmpl-6vDbJ18NfgEWAoZpC4boAEjFlKVSg at 0x21f49afef70> JSON: {
  "choices": [
    {
      "finish_reason": "stop",
      "index": 0,
      "logprobs": null,
      "text": "\n This function trains the model on the training set.\n \n Parameters:\n    model (torch.nn.Module): The model to be trained.\n    train_loader (torch.utils.data.DataLoader): The training set.\n    device (torch.device): The device to train the model on.\n    criterion (torch.nn.modules.loss): The loss function to use.\n    optimizer (torch.optim.Optimizer): The optimizer to use.\n \n Returns:\n    running_loss (AverageMeter): The running loss of the training.\n "
    }
  ],
  "created": 1679094997,
  "id": "cmpl-6vDbJ18NfgEWAoZpC4boAEjFlKVSg",
  "model": "code-cushman-001",
  "object": "text_completion",
  "usage": {
    "completion_tokens": 133,
    "prompt_tokens": 150,
    "total_tokens": 283
  }
}

In [9]:
print(response['choices'][0]['text'])


 This function trains the model on the training set.
 
 Parameters:
    model (torch.nn.Module): The model to be trained.
    train_loader (torch.utils.data.DataLoader): The training set.
    device (torch.device): The device to train the model on.
    criterion (torch.nn.modules.loss): The loss function to use.
    optimizer (torch.optim.Optimizer): The optimizer to use.
 
 Returns:
    running_loss (AverageMeter): The running loss of the training.
 


In [10]:
def merge_docstring(orig_function, docstring):
    function_string = inspect.getsource(orig_function)
    split = function_string.split("\n")
    first_part, second_part = split[0], split[1:]
    output = first_part + '\n    """' + docstring + '    """' + '\n' + '\n'.join(second_part)
    return output

In [15]:
function_with_prompts = merge_docstring(train, response['choices'][0]['text'])
print(function_with_prompts)

def train(model, train_loader, device, criterion, optimizer):
    """
 This function trains the model on the training set.
 
 Parameters:
    model (torch.nn.Module): The model to be trained.
    train_loader (torch.utils.data.DataLoader): The training set.
    device (torch.device): The device to train the model on.
    criterion (torch.nn.modules.loss): The loss function to use.
    optimizer (torch.optim.Optimizer): The optimizer to use.
 
 Returns:
    running_loss (AverageMeter): The running loss of the training.
     """
    model.train()
    running_loss = AverageMeter()

    for data in train_loader:
        inputs, labels = data

        inputs = inputs.to(device)
        labels = labels.to(device)

        prediction = model(inputs)
        loss = criterion(prediction, labels)
        running_loss.update(loss.item(), len(inputs))

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    return running_loss



In [14]:
# Write function with DocString to Python file
with open('output_withdocstring.py', 'w') as f:
    f.write(function_with_prompts)