## RegEx

In [13]:
code = """
void cpp_evolve(const std::vector<program> &h_oldprogs,
                std::vector<program> &h_nextprogs, const int n_samples,
                const float *data, const float *y, const float *sample_weights,
                const param &params, const int generation, const int seed) {
  auto n_progs = params.population_size;
  auto tour_size = params.tournament_size;
  auto n_tours = n_progs; // at least num_progs tournaments

  // Seed engines
  PhiloxEngine h_gen(seed); // use Philox instead of Mersenne Twister?

  uniform_real_distribution_custom<float> dist_U(0.0f, 1.0f);

  // Build, Mutate and Run Tournaments

  if (generation == 1) {
    // Build random programs for the first generation
    for (auto i = 0; i < n_progs; ++i) {
      build_program(h_nextprogs[i], params, h_gen);
    }

  } else {
    // Set mutation type
    float mut_probs[4];
    mut_probs[0] = params.p_crossover;
    mut_probs[1] = params.p_subtree_mutation;
    mut_probs[2] = params.p_hoist_mutation;
    mut_probs[3] = params.p_point_mutation;
    std::partial_sum(mut_probs, mut_probs + 4, mut_probs);

    for (auto i = 0; i < n_progs; ++i) {
      float prob = dist_U(h_gen);

      if (prob < mut_probs[0]) {
        h_nextprogs[i].mut_type = mutation_t::crossover;
        n_tours++;
      } else if (prob < mut_probs[1]) {
        h_nextprogs[i].mut_type = mutation_t::subtree;
      } else if (prob < mut_probs[2]) {
        h_nextprogs[i].mut_type = mutation_t::hoist;
      } else if (prob < mut_probs[3]) {
        h_nextprogs[i].mut_type = mutation_t::point;
      } else {
        h_nextprogs[i].mut_type = mutation_t::reproduce;
      }
    }

    // Run tournaments
    std::vector<int> d_win_indices(n_tours);

    auto criterion = params.criterion();
    tournament_kernel(h_oldprogs, d_win_indices.data(), seed, n_progs, n_tours,
                      tour_size, criterion, params.parsimony_coefficient);

    // dim3 nblks(raft::ceildiv(n_tours, GENE_TPB), 1, 1);
    // batched_tournament_kernel<<<nblks, GENE_TPB, 0, stream>>>(
    //     d_oldprogs, d_win_indices.data(), tour_seeds.data(), n_progs,
    //     n_tours, tour_size, criterion, params.parsimony_coefficient);
    // RAFT_CUDA_TRY(cudaPeekAtLastError());
    // h.sync_stream(stream);

    // Perform host mutations

    auto donor_pos = n_progs;
    for (auto pos = 0; pos < n_progs; ++pos) {
      auto parent_index = d_win_indices[pos];

      if (h_nextprogs[pos].mut_type == mutation_t::crossover) {
        // Get secondary index
        auto donor_index = d_win_indices[donor_pos];
        donor_pos++;
        crossover(h_oldprogs[parent_index], h_oldprogs[donor_index],
                  h_nextprogs[pos], params, h_gen);
      } else if (h_nextprogs[pos].mut_type == mutation_t::subtree) {
        subtree_mutation(h_oldprogs[parent_index], h_nextprogs[pos], params,
                         h_gen);
      } else if (h_nextprogs[pos].mut_type == mutation_t::hoist) {
        hoist_mutation(h_oldprogs[parent_index], h_nextprogs[pos], params,
                       h_gen);
      } else if (h_nextprogs[pos].mut_type == mutation_t::point) {
        point_mutation(h_oldprogs[parent_index], h_nextprogs[pos], params,
                       h_gen);
      } else if (h_nextprogs[pos].mut_type == mutation_t::reproduce) {
        h_nextprogs[pos] = h_oldprogs[parent_index];
      } else {
        // Should not come here
      }
    }
  }

  // Update raw fitness for all programs
  set_batched_fitness(n_progs, h_nextprogs, params, n_samples, data, y,
                      sample_weights);
}
"""

In [14]:
import re
import hashlib

def hash_identifier(name):
    # Creates a short valid C++ identifier from a SHA-1 hash
    return '_obf_' + hashlib.sha1(name.encode()).hexdigest()[:8]

def obfuscate_cpp_code_with_map(code):
    identifier_pattern = r'\b([a-zA-Z_][a-zA-Z_0-9]*)\b'

    cpp_keywords = {
        'int', 'float', 'double', 'char', 'bool', 'void', 'class', 'struct',
        'if', 'else', 'for', 'while', 'switch', 'case', 'return', 'break',
        'continue', 'public', 'private', 'protected', 'const', 'static',
        'inline', 'virtual', 'template', 'typename', 'using', 'namespace',
        'include', 'new', 'delete', 'this', 'true', 'false', 'nullptr',
        'operator', 'override', 'typedef', 'enum', 'sizeof', 'do', 'goto',
        'extern', 'union', 'volatile', 'register', 'friend', 'explicit',
        'try', 'catch', 'throw', 'noexcept', 'default', 'constexpr', 'inline'
    }

    identifiers = set(re.findall(identifier_pattern, code))
    identifiers = {idf for idf in identifiers if idf not in cpp_keywords}

    obfuscation_map = {idf: hash_identifier(idf) for idf in identifiers}
    sorted_identifiers = sorted(obfuscation_map.keys(), key=len, reverse=True)

    for idf in sorted_identifiers:
        code = re.sub(r'\b{}\b'.format(re.escape(idf)), obfuscation_map[idf], code)

    return code, obfuscation_map


def deobfuscate_cpp_code(obfuscated_code, obfuscation_map):
    reverse_map = {v: k for k, v in obfuscation_map.items()}
    sorted_keys = sorted(reverse_map.keys(), key=len, reverse=True)

    for obf in sorted_keys:
        obfuscated_code = re.sub(r'\b{}\b'.format(re.escape(obf)), reverse_map[obf], obfuscated_code)

    return obfuscated_code

# Example usage
obfuscated_code, obfuscation_map = obfuscate_cpp_code_with_map(code)
print("Obfuscated Code:\n", obfuscated_code)

Obfuscated Code:
 
void _obf_d327d0e7(const _obf_55ec981f::_obf_027c1146<_obf_81d9aeea> &_obf_4e64f60a,
                _obf_55ec981f::_obf_027c1146<_obf_81d9aeea> &_obf_79341a1b, const int _obf_1409d7c3,
                const float *_obf_a17c9aaa, const float *_obf_95cb0bfd, const float *_obf_d600a662,
                const _obf_df066517 &_obf_fd7b034e, const int _obf_cd738ebd, const int _obf_92713d47) {
  _obf_0d612c12 _obf_70ade028 = _obf_fd7b034e._obf_d081f61f;
  _obf_0d612c12 _obf_6a630bcc = _obf_fd7b034e._obf_24b2946b;
  _obf_0d612c12 _obf_d5ffe29b = _obf_70ade028; // _obf_27e90dfa _obf_b9e3680a _obf_aff53679 _obf_c9fa0e96

  // _obf_32fe9f5b _obf_2be3e148
  _obf_4605c007 _obf_333c8a09(_obf_92713d47); // _obf_04489a12 _obf_1a40c174 _obf_571d5bc7 _obf_de04fa0e _obf_f28ade7b _obf_868bbfb0?

  _obf_7875a655<float> _obf_6ffe8acf(0.0f, 1.0f);

  // _obf_bbd80cf7, _obf_5c46b7e7 _obf_cffa50a3 _obf_b1b39260 _obf_fee20df1

  if (_obf_cd738ebd == 1) {
    // _obf_bbd80cf7 _obf_a415ab5c _ob

In [19]:
import os
import openai
from openai import OpenAI

# Recommended: use environment variable for API key
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY", "your-api-key-here"))

# === GPT Chat Completion ===
def ask_gpt_chat(prompt, model="gpt-4o", temperature=0.7, max_tokens=300):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "You are a helpful assistant."},
                {"role": "user", "content": prompt}
            ],
            temperature=temperature,
            max_tokens=max_tokens,
        )
        return response.choices[0].message.content.strip()
    except Exception as e:
        print(f"Error: {e}")
        return None
from utils.string_utils import extract_markdown_blocks
def generate_optimized_code(code):
    prompt = f"""
    Please optimize the following C++ code while maintaining its exact functionality and signature:
    
    ```cpp
    {code}
    ```
    
    Focus on:
    1. Optimizing for performance
    2. Maintaining the same functionality
    """
    
    return ask_gpt_chat(prompt, max_tokens=10000, temperature=0.7)

# Generate optimized code from the obfuscated code
print("Generating optimized code...")
generated_code = generate_optimized_code(obfuscated_code)
print("Generated Code:\n", generated_code)
generated_code = extract_markdown_blocks(generated_code)[-1]
print("Generated Optimized Code:\n", generated_code)


Generating optimized code...
Generated Code:
 To optimize the given C++ code while maintaining its functionality and signature, we can perform the following actions:

1. **Remove Unused Variables and Comments**: Remove any variables and comments that are not used within the function, as they add unnecessary clutter.
2. **Optimize Loops and Conditionals**: Ensure loops and conditionals are efficient and straightforward.
3. **Reduce Function Calls**: Inline any simple function calls if possible to reduce overhead.
4. **Use Standard Library Algorithms**: Utilize standard algorithms for operations like transformations or conditions checking when applicable.

Here's the optimized version of the code:

```cpp
void _obf_d327d0e7(const _obf_55ec981f::_obf_027c1146<_obf_81d9aeea> &_obf_4e64f60a,
                   _obf_55ec981f::_obf_027c1146<_obf_81d9aeea> &_obf_79341a1b, const int _obf_1409d7c3,
                   const float *_obf_a17c9aaa, const float *_obf_95cb0bfd, const float *_obf_d600a

In [20]:
# Use the existing deobfuscate_cpp_code function with the obfuscation_map
deobfuscated_generated_code = deobfuscate_cpp_code(generated_code, obfuscation_map)
print("Deobfuscated Generated Code:\n", deobfuscated_generated_code)


Deobfuscated Generated Code:
 void cpp_evolve(const std::vector<program> &h_oldprogs,
                   std::vector<program> &h_nextprogs, const int n_samples,
                   const float *data, const float *y, const float *sample_weights,
                   const param &params, const int generation, const int seed) {

  auto n_progs = params.population_size;

  PhiloxEngine h_gen(seed);
  uniform_real_distribution_custom<float> dist_U(0.0f, 1.0f);

  if (generation == 1) {
    for (auto i = 0; i < n_progs; ++i) {
      build_program(h_nextprogs[i], params, h_gen);
    }
  } else {
    float mut_probs[4] = {
      params.p_crossover,
      params.p_subtree_mutation,
      params.p_hoist_mutation,
      params.p_point_mutation
    };
    std::partial_sum(mut_probs, mut_probs + 4, mut_probs);

    auto n_tours = 0;
    for (auto i = 0; i < n_progs; ++i) {
      float prob = dist_U(h_gen);

      if (prob < mut_probs[0]) {
        h_nextprogs[i].mut_type = mutation_t::crossover;
     