Stabilirea rolului unui subalgoritm:

In [3]:
import pandas as pd
import torch
from transformers import AutoModel, AutoTokenizer
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = AutoModel.from_pretrained("microsoft/codebert-base").to(device)
tokenizer = AutoTokenizer.from_pretrained("microsoft/codebert-base")

def get_embeddings_in_batches(code_snippets, batch_size=32):
    all_embeddings = []
    for i in range(0, len(code_snippets), batch_size):
        batch = code_snippets[i:i + batch_size]
        inputs = tokenizer(batch, padding=True, truncation=True, return_tensors="pt").to(device)
        with torch.no_grad():
            outputs = model(**inputs)
        batch_embeddings = outputs.last_hidden_state.mean(dim=1).cpu().numpy()
        all_embeddings.append(batch_embeddings)
    return np.vstack(all_embeddings)

df = pd.read_csv('data/train.csv')
print(df.shape)


(18612, 4)


In [4]:
code_snippets = df['output'].tolist()
labels = df['instruction'].tolist()

embeddings = get_embeddings_in_batches(code_snippets, batch_size=256)

clf = MLPClassifier(hidden_layer_sizes=(128,64,32,16,8,4,), max_iter=500, random_state=42)
clf.fit(embeddings, labels)

KeyboardInterrupt: 

In [None]:
new_code_snippet = """
def quicksort(arr):
    if len(arr) <= 1:
        return arr
    pivot = arr[len(arr) // 2]
    left = [x for x in arr if x < pivot]
    middle = [x for x in arr if x == pivot]
    right = [x for x in arr if x > pivot]
    return quicksort(left) + middle + quicksort(right)
"""
new_embedding = get_embeddings_in_batches([new_code_snippet], batch_size=1)
role = clf.predict(new_embedding)
print(f"The role of the algorithm is: {role[0]}")

The role of the algorithm is: Modify an existing Python program that implements a genetic algorithm for parameter optimization to add a new parameter.


Generare de specificatii:

In [None]:
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer

model = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/codet5-base")
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codet5-base")

code_snippet = """
def quicksort(arr):
    if len(arr) <= 1:
        return arr
    pivot = arr[len(arr) // 2]
    left = [x for x in arr if x < pivot]
    middle = [x for x in arr if x == pivot]
    right = [x for x in arr if x > pivot]
    return quicksort(left) + middle + quicksort(right)
"""

prompt = f"Generate a Python specification for the following function:\n\n{code_snippet}\n\n# Comment:"

inputs = tokenizer(prompt, return_tensors="pt", truncation=True, padding=True)

outputs = model.generate(inputs.input_ids, max_length=100, num_return_sequences=1)

comment = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(comment)



####### Comment: ## Comment: ## Comment: ## Comment: ## Comment: ## Comment: ## Comment: # Comment: ## Comment: ## Comment: ## Comment: ## Comment: ## Comment: # Comment: ## Comment: # Comment: ## Comment: # Comment: ## Comment:


Generarea de cod sursă din descrierea în limbaj natural:

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("Salesforce/codegen-350M-multi")
tokenizer = AutoTokenizer.from_pretrained("Salesforce/codegen-350M-multi")

description = "Write a function that verifies if a number is prime or not"

inputs = tokenizer.encode(description, return_tensors="pt")

outputs = model.generate(inputs, max_length=1000, num_return_sequences=1)

code = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(code)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Write a function to perform quicksort on a list of integers.

#include <iostream>
#include <vector>
#include <algorithm>

using namespace std;

int main() {
    int n;
    cin >> n;
    vector<int> v(n);
    for (int i = 0; i < n; i++) {
        cin >> v[i];
    }
    sort(v.begin(), v.end());
    for (int i = 0; i < n; i++) {
        cout << v[i] << " ";
    }
    return 0;
}

