# Convert Osu to Beet Saber

## 1. Convert .osz to .osu

In [1]:
# find install dir
import os
import zipfile

In [18]:
osz_path = '../../oszs/'
unzip_path = '../../oszs/charts/'

In [19]:
# find all osu zips
oszs = [osz_path+x for x in os.listdir(osz_path) if x.endswith('osz')]
oszs

['../../oszs/fubuki.osz',
 '../../oszs/killing-my-love.osz',
 '../../oszs/reality-surf.osz',
 '../../oszs/ETA.osz',
 '../../oszs/rockefeller-st.osz',
 '../../oszs/stupid-horse.osz',
 '../../oszs/duvet.osz',
 '../../oszs/running-man.osz']

In [4]:
def retain_file(filepath):
  """Returns if file should be kept
  At time of writing, keeps osu metadata files and mp3 files
  (wav files are used only for hitsounds (irrelevant))"""
  if not (filepath.endswith('osu') or filepath.endswith('mp3')):
    return False
  return True

In [16]:
def keep_file(filepath):
  """
  Tells which files should be kept
  """
  checks = ['easy' in filepath.lower(),
            'normal' in filepath.lower(), # realistically not playing hards or higher
            #'hard' in filepath.lower(), 
            #'insane' in filepath.lower(),
            #'extra' in filepath.lower(),
            #'expert' in filepath.lower(),
            'audio.mp3' in filepath,
            'kantan' in filepath.lower(),
            'futsuu' in filepath.lower(),
            #'muzukashii' in filepath.lower(),
            #'oni'
            ]
  # !impt: taiko diffs are like so: kantan, futsuu, muzukashii, oni
  
  return ((True in checks) and ('.wav' not in filepath))

In [17]:
# unzip them. how?
# # make dir per song
# # retain metadata and audio
os.makedirs(unzip_path, exist_ok=True)
for osz in oszs:
  songname = osz.split('/')[-1].split('.')[0]
  save_path = unzip_path+songname+'/'
  with zipfile.ZipFile(osz, 'r') as zip_ref:
    zip_ref.extractall(save_path)

# delete unneeded files:
# # subdirs is minimal directory paths (e.g. killing-my-love, duvet w.o. abspath)
subdirs = [entry for entry in os.listdir(unzip_path) if os.path.isdir(os.path.join(unzip_path, entry))]
for subdir in subdirs:
  dirpath = os.path.join(unzip_path,subdir)
  for file in os.listdir(dirpath):
    if not keep_file(file):
      os.remove(os.path.join(dirpath,file))

## 2. Extract Hit Objects from .osu

In [7]:
# wiki is extremely well documented
# # https://osu.ppy.sh/wiki/en/Client/File_formats/osu_%28file_format%29#type:~:text=Slider%20border%20colour-,Hit%20objects,-Hit%20object%20syntax

In [8]:
example_song = os.path.join(unzip_path,'lovenote')

In [9]:
example_osu_dir = os.listdir(example_song)
example_osu_dir

['audio.mp3']

In [10]:
#example_osu #= example_osu_dir[2]

In [11]:
def classify_object(flag):
  """
  Derive object type from osu's binary flags
  
  Simplied representation:
  - 'circle'
  - 'slider'
  - 'spinner'
  
  """
  # 0 bit indiates slider
  circle = (flag & 1) != 0 # check neq 0 not eq 1 b/c bit shift
  slider = (flag & 1<<1) != 0
  spinner = (flag & 1<<3) != 0
  
  if circle+slider+spinner>1:
    print("Whaaat")
  if circle: 
    return 'circle'
  if slider: 
    return 'slider'
  if spinner: 
    return 'spinner'
  print(f"Something is wrong in object classification. Recieved flag: {flag}")
  return None

A quick note about taiko. Besides naming convention for difficulties, taiko and osu!std objects are stored identically. hype

In [12]:
def extract_hit_objects(osu_path, just_ints = False):
  """
  Find type information and timing data for all hit objects contained in an .osu file
  
  Inputs:
  - osu_path: path to osu metadata file
  
  Returns three zipped arrays
  - note_types: Array of 'slider', 'spinner' and 'circle' strings
  - note_times: Array of integers, representing time in ms when note appears (relative to song start)
  - note_ints: Array of 32 bit integers written in hex containing relevant note info. See game_code/notes.md for more info
  """
  start_str = '[HitObjects]'
  #csv_fields = 'x,y,time,type,hitSound,objectParams,hitSample'.split(',') # from the wiki
  note_types = []
  note_times = []
  note_ints = []
  with open(osu_path, 'r') as map_file:
    content = map_file.read()
    #print(content.find(start_str))
    # see csv_fields comment for how hit objects are parameterized
    hit_objects = content[content.find(start_str)+len(start_str)+1:].split('\n') # extract hit objects
    #print(len(hit_objects))
    # for now, just time and circle/slider class
    for hit_object in hit_objects:
      if len(hit_object)>5: # there will always be at least 6 params
        hit_object_fields = hit_object.split(',')
        #print(hit_object_fields)
        ms, note_type = hit_object_fields[2],hit_object_fields[3]
        note_types.append(classify_object(int(note_type)))
        note_times.append(int(ms))
  # creating note_ints
  for i in range(len(note_types)):
    # !impt
    has_been_hit = 0
    channel = 0 # 0=L, 1=R
    has_repeat_slider = 0
    
    has_slider = note_types[i]=='slider'
    has_spinner = note_types[i]=='spinner'
    has_circle = note_types[i]=='circle'

    combined_int = has_been_hit<<31 | channel<<30| has_spinner<<29 | has_repeat_slider | has_slider<< 27 | has_circle<<26 | note_times[i]

    binary_int = bin(combined_int)[2:].zfill(32)
    #print(binary_int)
    hex_int_blocks = []
    for i in range(8):
      bin4 = binary_int[4*i:4*i+4]
      hex_int_blocks.append(hex(int('0b'+bin4,2))[2:])
    hex_int = '0x'+''.join(hex_int_blocks)
    #print(hex_int)
    note_ints.append(hex_int)
  if just_ints: # programming sin tbh
    return note_ints
  return list(zip(note_types,note_times,note_ints))
  #return pd.read_csv(content)#,usecols=csv_fields)

In [22]:
song = "../../oszs/charts/killing-my-love/LESLIE PARRISH - KILLING MY LOVE (Cut Ver.) (Pincus) [Kirishima-'s Normal Bunta's Impreza].osu"
hitobjs = extract_hit_objects(song)

In [23]:
# type conversion
print(len(hitobjs))
for i in hitobjs:
  print(i)
  break


256
('slider', 1675, '0x0800068b')


In [24]:
# test deciphering
correct = 0
for a,b,number in hitobjs:
  get_position = int('0b'+'1'*20,2)
  number_bin = int(number,16)
  correct += (get_position&number_bin)==b
correct==len(hitobjs) #if true all good

True

In [25]:
to_c_string = lambda x: '{'+', '.join(x)+'}'

In [44]:
# !impt: where you extract hit objects 
duvet = '../../oszs/charts/duvet/boa - Duvet (TV Size) (Girls Love) [Easy].osu' 
eta = "../../oszs/charts/ETA/NewJeans - ETA (bielsuu) [miss yo's Normal].osu"
killing_my_love = "../../oszs/charts/killing-my-love/LESLIE PARRISH - KILLING MY LOVE (Cut Ver.) (Pincus) [Kirishima-'s Normal Bunta's Impreza].osu"
stupid_horse = '../../oszs/charts/stupid-horse/100 gecs - stupid horse (hehe) [normal].osu'
fubuki = '../../oszs/charts/fubuki/Street - Sakura Fubuki (eiri-) [Normal].osu'
reality_surf = '../../oszs/charts/reality-surf/bladee - Reality Surf (Metzo) [kantan].osu'
rockefeller = '../../oszs/charts/rockefeller-st/Getter Jaani - Rockefeller Street (Nightcore Mix) (Sotarks) [Akitoshi\'s Normal].osu'

In [46]:
# add more songs as needed
hitobjs_boa = extract_hit_objects(duvet, just_ints=True)
hitobjs_eta = extract_hit_objects(eta, just_ints=True)
hitobjs_love = extract_hit_objects(killing_my_love, just_ints=True)
hitobjs_surf = extract_hit_objects(reality_surf, just_ints=True)
hitobjs_fubuki = extract_hit_objects(fubuki, just_ints=True)
hitobjs_horse = extract_hit_objects(stupid_horse, just_ints=True)
hitobjs_rockefeller = extract_hit_objects(rockefeller, just_ints=True)

# the results
boa_res = to_c_string(hitobjs_boa)
eta_res = to_c_string(hitobjs_eta)
killing_my_love_res = to_c_string(hitobjs_love) 
surf_res = to_c_string(hitobjs_surf)
fubuki_res = to_c_string(hitobjs_fubuki)
horse_res = to_c_string(hitobjs_horse) 
rock_res = to_c_string(hitobjs_rockefeller)

In [36]:
len(boa_res.split(',')), boa_res

(78,
 '{0x08000776, 0x04000bd1, 0x04000d10, 0x08000eee, 0x080013f0, 0x080017bd, 0x04001e07, 0x04001feb, 0x0400212e, 0x04002312, 0x08002592, 0x04002959, 0x08002a9c, 0x04002f9c, 0x0400317d, 0x0800335f, 0x08003863, 0x04003c26, 0x08003d67, 0x08003fe9, 0x0400464b, 0x0800478d, 0x04004b53, 0x08004dd7, 0x04005235, 0x04005375, 0x08005554, 0x04005a67, 0x08005ba9, 0x080061f3, 0x080065b4, 0x04006a0b, 0x08006bfa, 0x08006fc9, 0x080074de, 0x040078a7, 0x080079e9, 0x0400801d, 0x040082a1, 0x08008525, 0x080088eb, 0x04008f36, 0x08009078, 0x0800943e, 0x08009804, 0x08009a89, 0x0400a088, 0x2000a2f0, 0x0400b734, 0x0800b9b6, 0x0800bd79, 0x0400c279, 0x0400c3ba, 0x0800c63b, 0x0400cb4a, 0x2000cdcc, 0x0800e97b, 0x0400f0f5, 0x0400f38e, 0x0800f4d0, 0x0400f896, 0x0800f9d8, 0x04010022, 0x040102a7, 0x080103e9, 0x080108f1, 0x08010cb7, 0x0801107e, 0x080116c8, 0x04011d13, 0x08011e55, 0x0801221b, 0x04012866, 0x04012aea, 0x08012c26, 0x0801311c, 0x080134e2, 0x200138a8}')

In [None]:
len(eta_res.split(',')), eta_res

266


(None,
 '{0x08001d05, 0x08001f9f, 0x0800223a, 0x080025b3, 0x0400276f, 0x0400292c, 0x08002ae8, 0x08002d83, 0x0800301d, 0x08003396, 0x04003553, 0x0400370f, 0x080038cc, 0x08003c45, 0x08003fbd, 0x08004336, 0x080046af, 0x0400486c, 0x04004a28, 0x08004b06, 0x08004da1, 0x08005493, 0x0400572d, 0x0400580c, 0x080059c8, 0x04005d41, 0x04005efd, 0x040060ba, 0x08006276, 0x04006511, 0x040065ef, 0x080067ac, 0x04006968, 0x08006b25, 0x08006ce1, 0x0800705a, 0x08007216, 0x080073d3, 0x0800774c, 0x08007908, 0x08007ac5, 0x08007e3d, 0x08007ffa, 0x080081b6, 0x08008373, 0x040088a8, 0x04008986, 0x08008a65, 0x04008c21, 0x04008ddd, 0x08008f9a, 0x04009313, 0x040094cf, 0x0800968c, 0x04009a05, 0x04009bc1, 0x08009d7d, 0x0400a018, 0x0400a0f6, 0x0400a2b3, 0x0800a46f, 0x0400a70a, 0x0400a7e8, 0x0400a9a5, 0x0800ab61, 0x0800aeda, 0x0800b253, 0x0400b5cc, 0x0400b945, 0x0400bcbd, 0x0800c036, 0x0400c1f3, 0x0800c3af, 0x0800c56c, 0x0400c806, 0x0400c8e5, 0x0800caa1, 0x0800ce1a, 0x0800cfd6, 0x0400d193, 0x0800d34f, 0x0800d50c, 0x0400

In [38]:
len(killing_my_love_res.split(',')), killing_my_love_res

(256,
 '{0x0800068b, 0x0800080a, 0x04000b08, 0x08000c87, 0x08000e06, 0x08001103, 0x04001282, 0x040017be, 0x0800187e, 0x04001cfb, 0x08001e79, 0x04002177, 0x040022f6, 0x08002475, 0x040028f2, 0x08002a71, 0x04002d6e, 0x04002eed, 0x0800306c, 0x040032ab, 0x0800336a, 0x08003668, 0x080038a6, 0x04003ae4, 0x08003c63, 0x04003ea2, 0x08003f61, 0x0800425f, 0x0800449d, 0x040046dc, 0x0800485b, 0x040049dc, 0x04004a9c, 0x08004eb0, 0x08005331, 0x080054b1, 0x04005813, 0x08005933, 0x08005ab3, 0x08005f35, 0x080060b5, 0x04006417, 0x08006537, 0x080066b7, 0x080068f8, 0x08006b39, 0x08006cb9, 0x0400701a, 0x0800713b, 0x080072bb, 0x080074fc, 0x0800773d, 0x080078bd, 0x04007c1e, 0x08007d3f, 0x08007ebf, 0x08008100, 0x08008341, 0x080084c1, 0x080090c5, 0x08009306, 0x08009547, 0x040096c7, 0x04009848, 0x08009908, 0x08009b49, 0x08009cc9, 0x08009f0a, 0x0800a14b, 0x0400a2cb, 0x0400a44c, 0x0800a50c, 0x0800a74d, 0x0800a8cd, 0x0800ab0e, 0x0800ad4f, 0x0400aecf, 0x0400b04f, 0x0800b110, 0x0800b350, 0x0800b4d1, 0x0800b712, 0x0800b

In [40]:
len(surf_res.split(',')), surf_res

(193,
 '{0x0400030c, 0x040005fa, 0x040008e8, 0x04000ec4, 0x040011b2, 0x040014a0, 0x04001a7c, 0x04001d6a, 0x04002058, 0x04002634, 0x04002922, 0x04002c10, 0x040031ec, 0x040034da, 0x04003651, 0x040037c8, 0x04003ab6, 0x04003da4, 0x04004092, 0x04004209, 0x04004380, 0x0400466e, 0x0400495c, 0x04004dc1, 0x04004f38, 0x040050af, 0x04005226, 0x04005514, 0x04005979, 0x04005af0, 0x04005dde, 0x04005f55, 0x040060cc, 0x04006531, 0x040066a8, 0x0400681f, 0x04006996, 0x04006c84, 0x04006f72, 0x040070e9, 0x04007260, 0x0400754e, 0x040076c5, 0x0400783c, 0x04007b2a, 0x04007ca1, 0x04007e18, 0x04008106, 0x0400827d, 0x040083f4, 0x040089d0, 0x04008b47, 0x04008fac, 0x0400929a, 0x04009588, 0x040096ff, 0x04009876, 0x04009b64, 0x04009e52, 0x0400a140, 0x0400a2b7, 0x0400a42e, 0x0400a5a5, 0x0400a71c, 0x0400acf8, 0x0400ae6f, 0x0400afe6, 0x0400b2d4, 0x0400b739, 0x0400b8b0, 0x0400ba27, 0x0400bb9e, 0x0400bd15, 0x0400be8c, 0x0400c17a, 0x0400c2f1, 0x0400c756, 0x0400c8cd, 0x0400cbbb, 0x0400cd32, 0x0400cea9, 0x0400d020, 0x0400d

In [41]:
len(fubuki_res.split(',')), fubuki_res

(219,
 '{0x0400015e, 0x04000967, 0x04000b69, 0x040016cb, 0x04001ed4, 0x040020d6, 0x08002c39, 0x04002e3b, 0x08002ee7, 0x0400303e, 0x08003194, 0x08003442, 0x04003644, 0x080036f0, 0x0800399e, 0x04003ba0, 0x08003c4b, 0x04003ef9, 0x08003fa4, 0x040041a7, 0x040042fe, 0x04004454, 0x040045ab, 0x08004702, 0x08004859, 0x080049b0, 0x04004b07, 0x08004c5e, 0x04005062, 0x080051b9, 0x08005310, 0x04005467, 0x080055be, 0x04005714, 0x040057c0, 0x04005917, 0x040059c3, 0x04005b18, 0x04005bc4, 0x04005d1b, 0x04005dc7, 0x08005f1e, 0x04006074, 0x08006120, 0x04006479, 0x080065d0, 0x04006727, 0x0800687e, 0x040069d4, 0x08006a80, 0x04006c82, 0x08006dd9, 0x04006f30, 0x04007087, 0x04007132, 0x080071de, 0x0400748b, 0x08007537, 0x0800768e, 0x08007890, 0x040079e7, 0x08007b3e, 0x04007c94, 0x08007deb, 0x04007f42, 0x04008099, 0x04008144, 0x080081f0, 0x0800849e, 0x040085f4, 0x080086a0, 0x0800894e, 0x08008aa4, 0x08008bfb, 0x04008f54, 0x080090ab, 0x04009202, 0x08009359, 0x040094af, 0x0800955b, 0x0400975d, 0x080098b4, 0x04009

In [42]:
len(horse_res.split(',')), horse_res

(182,
 '{0x04001410, 0x080014c2, 0x040016da, 0x0800183f, 0x08001c6e, 0x04001e86, 0x04001f39, 0x0800209e, 0x08002368, 0x08002798, 0x040029af, 0x04002a62, 0x08002bc7, 0x08002e91, 0x080032c1, 0x040034d8, 0x0400358b, 0x080036f0, 0x080039ba, 0x08003dea, 0x04004002, 0x040040b4, 0x0400437e, 0x040044e4, 0x04004596, 0x08004649, 0x04004913, 0x04004a79, 0x04004b2b, 0x08004bde, 0x08004ea8, 0x080052d7, 0x0800543c, 0x08005706, 0x040059d1, 0x04005b36, 0x04005c9b, 0x04005e00, 0x08005f65, 0x0400617d, 0x08006230, 0x080064fa, 0x08006a8e, 0x08006bf4, 0x08006d59, 0x04006f70, 0x08007023, 0x08007452, 0x080075b8, 0x040079e7, 0x08007b4c, 0x08007f7c, 0x080080e1, 0x04008510, 0x08008675, 0x08008aa5, 0x08008c0a, 0x04009039, 0x0800919e, 0x080095ce, 0x08009733, 0x04009b62, 0x08009cc8, 0x0800a0f7, 0x0800a25c, 0x0400a68c, 0x0800a7f1, 0x0800ac20, 0x0800ad85, 0x0400b1b5, 0x0800b31a, 0x0800b749, 0x0800b8ae, 0x0400bcde, 0x0800be43, 0x0800c272, 0x0800c3d8, 0x0800c807, 0x0400dcf4, 0x0800dda7, 0x0800dfbe, 0x0400e3ee, 0x0800e

In [47]:
len(rock_res.split(',')), rock_res

(160,
 '{0x040017ef, 0x08001960, 0x08001db4, 0x04002097, 0x04002208, 0x04002379, 0x080024ea, 0x0800293e, 0x04002c20, 0x04002d92, 0x04002f03, 0x08003074, 0x080034c8, 0x040037aa, 0x0400391b, 0x04003a8d, 0x08003bfe, 0x08004052, 0x04004334, 0x040044a5, 0x08004617, 0x080048f9, 0x08004bdb, 0x04004ebe, 0x0400502f, 0x080051a0, 0x08005483, 0x08005765, 0x04005a48, 0x04005bb9, 0x08005d2a, 0x0800600d, 0x080062ef, 0x040065d2, 0x04006743, 0x080068b4, 0x08006b97, 0x08006e79, 0x040070a3, 0x0800715b, 0x0800743e, 0x04007720, 0x04007892, 0x08007a03, 0x04007c2d, 0x08007ce5, 0x08007fc8, 0x040082aa, 0x0400841b, 0x0800858d, 0x040087b7, 0x0800886f, 0x08008b52, 0x04008e34, 0x04008fa5, 0x08009117, 0x04009340, 0x080093f9, 0x080096db, 0x040099be, 0x04009b2f, 0x08009ca0, 0x04009eca, 0x08009f83, 0x0400a265, 0x0400a3d7, 0x0400a548, 0x0400a6b9, 0x0800a82a, 0x0400aa54, 0x0800ab0d, 0x0400adef, 0x0400af60, 0x0800b0d2, 0x0400b2fb, 0x0800b3b4, 0x0800b697, 0x0400b979, 0x0400baea, 0x0400bc5b, 0x0400bdcd, 0x0800bf3e, 0x0400c