In [21]:
import torch
import pandas as pd
import numpy as np
from tqdm import tqdm
import re
import os
from transformers import AutoTokenizer, AutoModelForCausalLM

In [2]:
def generate(text_in, tok_in, mod_in):
    tok_text = tok_in(text_in, return_tensors='pt').to('cuda:0')
    gen_text = mod_in.generate(**tok_text, max_new_tokens=512)
    dec_text = tok_in.decode(gen_text[0], skip_special_tokens=True)
    return dec_text

In [3]:
tokenizer = AutoTokenizer.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v0.3")

# tuned_model = AutoModelForCausalLM.from_pretrained("../finetuning/tuned_model/", torch_dtype=torch.float32)
baseline_model = AutoModelForCausalLM.from_pretrained("TinyLlama/TinyLlama-1.1B-Chat-v0.3", torch_dtype=torch.float32)
# rlhf_model = AutoModelForCausalLM.from_pretrained("../Reward Model/model/", torch_dtype=torch.float32)

Some weights of LlamaForCausalLM were not initialized from the model checkpoint at TinyLlama/TinyLlama-1.1B-Chat-v0.3 and are newly initialized: ['model.layers.10.self_attn.rotary_emb.inv_freq', 'model.layers.18.self_attn.rotary_emb.inv_freq', 'model.layers.1.self_attn.rotary_emb.inv_freq', 'model.layers.6.self_attn.rotary_emb.inv_freq', 'model.layers.21.self_attn.rotary_emb.inv_freq', 'model.layers.12.self_attn.rotary_emb.inv_freq', 'model.layers.0.self_attn.rotary_emb.inv_freq', 'model.layers.2.self_attn.rotary_emb.inv_freq', 'model.layers.8.self_attn.rotary_emb.inv_freq', 'model.layers.4.self_attn.rotary_emb.inv_freq', 'model.layers.15.self_attn.rotary_emb.inv_freq', 'model.layers.16.self_attn.rotary_emb.inv_freq', 'model.layers.14.self_attn.rotary_emb.inv_freq', 'model.layers.7.self_attn.rotary_emb.inv_freq', 'model.layers.3.self_attn.rotary_emb.inv_freq', 'model.layers.9.self_attn.rotary_emb.inv_freq', 'model.layers.13.self_attn.rotary_emb.inv_freq', 'model.layers.5.self_attn.rota

In [4]:
baseline_model.to('cuda:0')

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32003, 2048, padding_idx=0)
    (layers): ModuleList(
      (0-21): 22 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (k_proj): Linear(in_features=2048, out_features=256, bias=False)
          (v_proj): Linear(in_features=2048, out_features=256, bias=False)
          (o_proj): Linear(in_features=2048, out_features=2048, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (up_proj): Linear(in_features=2048, out_features=5632, bias=False)
          (down_proj): Linear(in_features=5632, out_features=2048, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNorm()


In [5]:
df = pd.read_feather('../mini_codenet/data/split/evaluate_train.ftr')
df.head()

Unnamed: 0,level_0,index,submission_id,problem_id,language,filename_ext,status,cpu_time,memory,code_size,accuracy,solution,problem_statement
0,1619278,5172,s373894550,p03202,C++,cpp,Wrong Answer,2103.0,1916.0,2741,,#include <bits/stdc++.h>\nusing namespace std;...,Score : 700 points \n Problem Statement There ...
1,771169,5835,s440823744,p03008,C++,cpp,Wrong Answer,176.0,256.0,1378,,#include <bits/stdc++.h>\n\nusing namespace st...,Score : 600 points \n Problem Statement The sq...
2,1654494,7516,s716084373,p03943,Python,py,Wrong Answer,17.0,2940.0,88,,"a, b, c = map(int, input().split())\n\nif a ==...",Score : 100 points \n Problem Statement Two st...
3,758900,5870,s029981993,p03047,Python,py,Accepted,17.0,2940.0,43,,"n,k = map(int,input().split())\nprint(n-k+1)",Score : 100 points \n Problem Statement Snuke ...
4,2792852,1786,s487610428,p03852,Python,py,Accepted,168.0,38384.0,114,,"c = input()\n\nif c == ""a"" or c == ""i"" or c ==...",Score : 100 points \n Problem Statement Given ...


In [6]:
eval_df = pd.read_feather('./file.feather')
eval_df.head()

Unnamed: 0,a,b
0,p02402,5\n10 1 5 4 17
1,p03316,12
2,p02995,4 9 2 3
3,p03129,3 2
4,p02612,1900


In [16]:
print(len(eval_df))

50


In [29]:
num_compiled = 0
num_checked = 0
dist = len(os.listdir('./baseline_files/'))
print(dist)
for pid in tqdm(set(eval_df['a'].values)):
    if num_checked >= dist-1:
        instance = df.loc[(df['status'] == 'Accepted') & (df['language'] == 'Python') & (df['problem_id'] == pid)]
        
        prompt = instance.iloc[0]['problem_statement']
        prompt = prompt.replace('\n', '')
        lang = instance.iloc[0]['language']
        # ------ baseline -------
        formatted_prompt = (f"<|im_start|>user\nGenerate the correct {lang} code to answer the following prompt that can also takens input and prints out an answer:{prompt}<|im_end|>\n<|im_start|>assistant\n")
        generated = generate(formatted_prompt, tokenizer, baseline_model)
        try:
            baseline_generated = re.search(f'```{lang.lower()}(\n|.)*```', generated).group()
        except:
            baseline_generated = re.search(f'```{lang.lower()}(\n|.)*```', generated+'```').group()
        #print(baseline_generated)
        try:
            open(f'./baseline_files/{num_checked}.txt', 'w', encoding="utf-8").write(baseline_generated)
        except:
            open(f'./baseline_files/{num_checked}.txt', 'w', encoding="utf-8").write(generated)
        try:
            x = compile(baseline_generated[3+len(lang):-3], 'test', 'exec')
            num_compiled += 1
            num_checked += 1
        except:
            num_checked += 1
            continue
    else:
        num_checked += 1
    #out = exec(x)
    #print(out)
# ------ finetuned -------
# formatted_prompt = (f"<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistantPython\n") # update this with newest model
# generated = generate(formatted_prompt, tokenizer, tuned_model)
# fine

28


100%|██████████| 50/50 [13:48<00:00, 16.58s/it]


In [None]:
print(generated)

In [None]:
baseline_generated = re.search(f'```{lang.lower()}(\n|.)*```', generated).group()
print(baseline_generated)