In [1]:
import pandas as pd
import os

In [2]:
quantifiers_dict = {
    "no": 0,
    "little": 1,
    "some": 2,
    "many": 3
}
elevation_dict = {
    "low": 0,
    "high": 1
}

In [3]:
dict_list = []
for filename in os.listdir("."):
    if filename.endswith(".txt"):
        prompt = filename.replace(",", "")
        tokens = prompt.split("_")
        with open(filename, "r") as file:
            level_string = file.read()
            dict_list.append({
                "filename": filename,
                "pipes": quantifiers_dict[tokens[0]],
                "enemies": quantifiers_dict[tokens[2]],
                "blocks": quantifiers_dict[tokens[4]],
                "elevation": -1 if len(tokens) == 6 else elevation_dict[tokens[6]],
                "level": level_string
            })

In [5]:
df = pd.DataFrame(dict_list)
df.to_csv("levels.csv", index=False)

In [13]:
def extract_result_info(result):
    return {
        "completion_percentage": result.getCompletionPercentage(),
        "remaining_time": result.getRemainingTime(),
        "mario_state": result.getMarioMode(),
        "kills": result.getKillsTotal(),
        "bricks": result.getNumDestroyedBricks(),
        "jumps": result.getNumJumps(),
        "jump_max_x": result.getMaxXJump(),
        "jump_max_air_time": result.getMaxJumpAirTime()
    }

In [6]:
# note: this requires to run LevelScorer in the Java application first

from py4j.java_gateway import JavaGateway

gateway = JavaGateway()
level_scorer = gateway.entry_point

In [14]:
scored_dicts = []
for row_dict in df.to_dict(orient="records"):
    scored_dict = {}
    scored_dict |= row_dict
    result = level_scorer.score(row_dict["level"])
    scored_dict |= extract_result_info(result)
    scored_dicts.append(scored_dict)

In [15]:
scored_df = pd.DataFrame(scored_dicts)
scored_df.head()

Unnamed: 0,filename,pipes,enemies,blocks,elevation,level,completion_percentage,remaining_time,mario_state,kills,bricks,jumps,jump_max_x,jump_max_air_time
0,"some_pipes,_many_enemies,_many_blocks,_low_ele...",2,3,3,0,----------------------------------------------...,0.007203,0,0,0,0,0,0.0,0
1,"no_pipes,_many_enemies,_many_blocks,_high_elev...",0,3,3,1,----------------------------------------------...,1.0,15080,0,5,0,7,183.45184,16
2,"no_pipes,_no_enemies,_no_blocks.txt",0,0,0,-1,----------------------------------------------...,0.51407,17480,0,0,0,2,119.70032,10
3,"little_pipes,_many_enemies,_no_blocks.txt",1,3,0,-1,----------------------------------------------...,1.0,15260,0,0,0,5,118.46857,10
4,"some_pipes,_no_enemies,_many_blocks,_low_eleva...",2,0,3,0,----------------------------------------------...,1.0,14900,0,0,0,6,157.71385,14


In [17]:
scored_df.to_csv("levels_scored.csv", index=False)