In [3]:
import os
import json
import torch

import numpy as np
from tqdm import tqdm
from typing import List, Union
from transformers import AutoTokenizer, AutoModelForSequenceClassification

class CodeScorer:
    def __init__(self, model_name, device="cuda" if torch.cuda.is_available() else "cpu"):
        self.model = AutoModelForSequenceClassification.from_pretrained(
            model_name, num_labels=1).to(device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.device = device

    def score(self, codes: Union[str, List[str]]) -> List[float]:
        if isinstance(codes, str):
            codes = [codes]
        if len(codes) == 0:
            return []
        self.model.eval()
        with torch.no_grad():
            inputs = self.tokenizer(
                codes,
                truncation=True,
                padding=True,
                max_length=1000,
                return_tensors="pt"
            ).to(self.device)
            outputs = self.model(**inputs)
            logits = outputs.logits
            preds = [float(x) for x in logits.squeeze(-1).tolist()]
            return preds

scorer = CodeScorer("weights")

In [19]:
lst = set()
with open("datasets/pfall.jsonl", "r") as f:
    for row in f:
        data = json.loads(row)
        string = data["prompt"] + data["completion"]
        trimmed_string = "\n".join(string.split("\n")[2:])
        if len(trimmed_string) < 10 or len(trimmed_string) > 1000:
            continue
        lst.add(string)

lst = list(lst)
print(f"num entrys: {len(lst)}")

num entrys: 1406441


In [31]:
import pandas as pd

limit = -1

dct = {}
scores = []
for i, x in tqdm(enumerate(lst[:limit]), total=len(lst[:limit])):
    trimmed_x = "\n".join(x.split("\n")[2:])
    score = scorer.score(x)
    scores.append(score)
    dct[x] = score

with open('data.json', 'w') as fp:
    json.dump(dct, fp)
    
scores = np.array(scores)
df_describe = pd.DataFrame(scores)
df_describe.describe()

  0%|          | 929/1406440 [01:19<33:16:18, 11.73it/s]


KeyboardInterrupt: 

In [27]:
low_quality_scripts_dct = {}
high_quality_scripts_dct = {}

for script, score in tqdm(zip(lst[:limit], scores[:limit])):
    if score < 0.4:
        low_quality_scripts_dct[script] = score
    if score > 0.75:
        high_quality_scripts_dct[script] = score

1000it [00:00, 476842.20it/s]


In [28]:
for low_quality_script, score in low_quality_scripts_dct.items():
    print("=" * 40)
    print(f"Script:\n{low_quality_script}")
    print(f"Score: {score}")

Script:
-- Path: WAI_script.server.lua
-- ROBMeta: Workspace/Whack-an-iAllure
-- manages the Whack-an-iAllure room

Score: [0.38993114]
Script:
-- Path: 219773.lua
-- ROBMeta: ReplicatedStorage/LOD/Close
local data={
{"tree_med_1",CFrame.new(452.22348022461,45.588024139404,3766.6313476563,0.38966590166092,0.033821139484644,-0.92033511400223,-0.015536633320153,0.99942463636398,0.03014943562448,0.92082518339157,0.0025507020764053,0.38996714353561)},
}
return data

Score: [0.37049186]
Script:
-- Path: Constants.lua
-- ROBMeta: ReplicatedStorage/Modules
-- Maximum lobby size
Constants.MaxLobbySize = 10



Score: [0.39252564]
Script:
-- Path: mouseLocal.client.lua
-- ROBMeta: StarterPlayer/StarterPlayerScripts
--			print("WTF NO LOCAL PLAYER ROOT!") 
			return
		end
		
		
		local t, pos = getMousePoint(mouse.X,mouse.Y)
		
		if t then	
			local curr	
			
			if t.Parent:FindFirstChild("barracks") then
Score: [0.39013433]
Script:
-- Path: ResolutionFix.client.lua
-- ROBMeta: ServerStorage/GUI/

In [24]:
for high_quality_script, score in high_quality_scripts_dct.items():
    print("=" * 40)
    print(f"Script:\n{high_quality_script}")
    print(f"Score: {score}")

Script:
-- Path: DesertIslandWelcome.lua
-- ROBMeta: StarterPlayer/StarterPlayerScripts/Classes/Dialogue
local References = require(game:GetService("ReplicatedStorage").References)
local Utilities = References.Utilities
local Services = References.Services

local dialogue = {
	name = "Welcome to Desert Island";
	steps = {
		start = {
			message = "Welcome to Desert Island!";
			nextStep = "two";
		};
		two = {
			message = "Visit the town to find Larry to buy & sell items, and Hogan to buy horses!";
			nextStep = "three";
		};
		three = {
			message = "You can find Gold rocks and Ruby crystals here. Use them to make yourself better lassos and tack!";
			nextStep = "four";
		};
		four = {
			message = "Visit Ruby near the oasis, she'll give you a quest!";
			nextStep = "five";
		};
		five = {
			message = "Keep an eye out for the rare Red Roan Appaloosa, it's only found on this island!";
			isEnd = true;
		};
	};
	answers = {}
}

return dialogue

Score: [0.81209445]
Script:
-- Path: Rag