## RegEx

In [1]:
code = """
void cpp_evolve(const std::vector<program> &h_oldprogs,
                std::vector<program> &h_nextprogs, const int n_samples,
                const float *data, const float *y, const float *sample_weights,
                const param &params, const int generation, const int seed) {
  auto n_progs = params.population_size;
  auto tour_size = params.tournament_size;
  auto n_tours = n_progs; // at least num_progs tournaments

  // Seed engines
  PhiloxEngine h_gen(seed); // use Philox instead of Mersenne Twister?

  uniform_real_distribution_custom<float> dist_U(0.0f, 1.0f);

  // Build, Mutate and Run Tournaments

  if (generation == 1) {
    // Build random programs for the first generation
    for (auto i = 0; i < n_progs; ++i) {
      build_program(h_nextprogs[i], params, h_gen);
    }

  } else {
    // Set mutation type
    float mut_probs[4];
    mut_probs[0] = params.p_crossover;
    mut_probs[1] = params.p_subtree_mutation;
    mut_probs[2] = params.p_hoist_mutation;
    mut_probs[3] = params.p_point_mutation;
    std::partial_sum(mut_probs, mut_probs + 4, mut_probs);

    for (auto i = 0; i < n_progs; ++i) {
      float prob = dist_U(h_gen);

      if (prob < mut_probs[0]) {
        h_nextprogs[i].mut_type = mutation_t::crossover;
        n_tours++;
      } else if (prob < mut_probs[1]) {
        h_nextprogs[i].mut_type = mutation_t::subtree;
      } else if (prob < mut_probs[2]) {
        h_nextprogs[i].mut_type = mutation_t::hoist;
      } else if (prob < mut_probs[3]) {
        h_nextprogs[i].mut_type = mutation_t::point;
      } else {
        h_nextprogs[i].mut_type = mutation_t::reproduce;
      }
    }

    // Run tournaments
    std::vector<int> d_win_indices(n_tours);

    auto criterion = params.criterion();
    tournament_kernel(h_oldprogs, d_win_indices.data(), seed, n_progs, n_tours,
                      tour_size, criterion, params.parsimony_coefficient);

    // dim3 nblks(raft::ceildiv(n_tours, GENE_TPB), 1, 1);
    // batched_tournament_kernel<<<nblks, GENE_TPB, 0, stream>>>(
    //     d_oldprogs, d_win_indices.data(), tour_seeds.data(), n_progs,
    //     n_tours, tour_size, criterion, params.parsimony_coefficient);
    // RAFT_CUDA_TRY(cudaPeekAtLastError());
    // h.sync_stream(stream);

    // Perform host mutations

    auto donor_pos = n_progs;
    for (auto pos = 0; pos < n_progs; ++pos) {
      auto parent_index = d_win_indices[pos];

      if (h_nextprogs[pos].mut_type == mutation_t::crossover) {
        // Get secondary index
        auto donor_index = d_win_indices[donor_pos];
        donor_pos++;
        crossover(h_oldprogs[parent_index], h_oldprogs[donor_index],
                  h_nextprogs[pos], params, h_gen);
      } else if (h_nextprogs[pos].mut_type == mutation_t::subtree) {
        subtree_mutation(h_oldprogs[parent_index], h_nextprogs[pos], params,
                         h_gen);
      } else if (h_nextprogs[pos].mut_type == mutation_t::hoist) {
        hoist_mutation(h_oldprogs[parent_index], h_nextprogs[pos], params,
                       h_gen);
      } else if (h_nextprogs[pos].mut_type == mutation_t::point) {
        point_mutation(h_oldprogs[parent_index], h_nextprogs[pos], params,
                       h_gen);
      } else if (h_nextprogs[pos].mut_type == mutation_t::reproduce) {
        h_nextprogs[pos] = h_oldprogs[parent_index];
      } else {
        // Should not come here
      }
    }
  }

  // Update raw fitness for all programs
  set_batched_fitness(n_progs, h_nextprogs, params, n_samples, data, y,
                      sample_weights);
}
"""

In [3]:
from utils.string_utils import obfuscate_cpp_code_with_map, deobfuscate_cpp_code, legible_identifier

# Example usage
obfuscated_code, obfuscation_map = obfuscate_cpp_code_with_map(code, identifier_function=legible_identifier)
print("Obfuscated Code:\n", obfuscated_code)

Obfuscated Code:
 
void o_pageantic(const std::vector<o_Nana> &o_gulping,
                std::vector<o_Nana> &o_rhythmist, const int o_stercorate,
                const float *o_eluviate, const float *o_flak, const float *o_Hecatean,
                const o_nonya &o_semifused, const int o_vespid, const int o_subursine) {
  o_gerrymander o_adventual = o_semifused.o_abbess;
  o_gerrymander o_wush = o_semifused.o_frizzler;
  o_gerrymander o_handyblow = o_adventual; // o_collaud o_presbytia o_proenzym o_niggardly

  // o_ume o_tightwad
  o_infirmness o_phycochromaceous(o_subursine); // o_tricyclist o_engross o_physostigmine o_Podaxonia o_ontography o_monsterhood?

  o_denutrition<float> o_unstern(0.0f, 1.0f);

  // o_bletheration, o_hypnotistic o_phlegmonous o_masterful o_overrank

  if (o_vespid == 1) {
    // o_bletheration o_polystyle o_demountability for o_unbooted o_aggregateness o_vespid
    for (o_gerrymander o_cumuliform = 0; o_cumuliform < o_adventual; ++o_cumuliform) {
      o_o

In [4]:
import os
import openai
from openai import OpenAI

# Recommended: use environment variable for API key
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "your-api-key-here"))

# === GPT Chat Completion ===
def ask_gpt_chat(prompt, model="gpt-4o", temperature=0.7, max_tokens=300):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_tokens=max_tokens,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error: {e}")
        return None
from utils.string_utils import extract_markdown_blocks
def generate_optimized_code(code):
    prompt = f"""
    Please optimize the following C++ code while maintaining its exact functionality and signature:
    
    ```cpp
    {code}
    ```
    
    Focus on:
    1. Optimizing for performance
    2. Maintaining the same functionality

    First, think through the optimization details. Then provide the optimized code.
    """
    
    return ask_gpt_chat(prompt, max_tokens=10000, temperature=0.7)

# Generate optimized code from the obfuscated code
print("Generating optimized code...")
generated_code = generate_optimized_code(obfuscated_code)
print("Generated Code:\n", generated_code)
generated_code = extract_markdown_blocks(generated_code)[-1]
print("Generated Optimized Code:\n", generated_code)

Generating optimized code...
Generated Code:
 To optimize the provided C++ code while maintaining its functionality, we can focus on several key areas:

1. **Loop Optimization**: Avoid unnecessary computations within loops. Loop indices should be chosen carefully, and unnecessary operations should be minimized.

2. **Vector and Array Usage**: Efficiently use vectors and arrays to minimize overhead. Avoid unnecessary copying and use direct access when possible.

3. **Function Calls**: Inline simple functions if beneficial, or use references to avoid unnecessary copying.

4. **Branch Prediction**: Arrange if-else conditions to take advantage of branch prediction by placing the most likely conditions first.

5. **Algorithm Efficiency**: Ensure any sorting or searching operations are efficient.

6. **Memory Management**: Minimize dynamic memory allocations and prefer stack allocation where possible.

Let's apply these optimizations to the code:

```cpp
void o_pageantic(const std::vector<o_

In [5]:
# Use the existing deobfuscate_cpp_code function with the obfuscation_map
deobfuscated_generated_code = deobfuscate_cpp_code(generated_code, obfuscation_map)
print("Deobfuscated Generated Code:\n", deobfuscated_generated_code)


Deobfuscated Generated Code:
 void cpp_evolve(const std::vector<program>& h_oldprogs,
                 std::vector<program>& h_nextprogs, const int n_samples,
                 const float* data, const float* y, const float* sample_weights,
                 const param& params, const int generation, const int seed) {
    auto n_progs = params.population_size;
    auto tour_size = params.tournament_size;
    auto n_tours = n_progs;

    PhiloxEngine h_gen(seed);
    uniform_real_distribution_custom<float> dist_U(0.0f, 1.0f);

    if (generation == 1) {
        for (auto i = 0; i < n_progs; ++i) {
            build_program(h_nextprogs[i], params, h_gen);
        }
    } else {
        float mut_probs[4] = {params.p_crossover, params.p_subtree_mutation,
                              params.p_hoist_mutation, params.p_point_mutation};
        std::sort(mut_probs, mut_probs + 4);

        for (auto i = 0; i < n_progs; ++i) {
            float prob = dist_U(h_gen);

            if (prob < mut_