# Convert Osu to Beet Saber

## 1. Convert .osz to .osu

In [1]:
# find install dir
import os
import zipfile

In [2]:
osz_path = '/Users/donu/Desktop/S25/ELEC 327/327-final-proj/oszs/'
unzip_path = '/Users/donu/Desktop/S25/ELEC 327/327-final-proj/oszs/charts/'

In [3]:
# find all osu zips
oszs = [osz_path+x for x in os.listdir(osz_path) if x.endswith('osz')]
oszs

['/Users/donu/Desktop/S25/ELEC 327/327-final-proj/oszs/killing-my-love.osz',
 '/Users/donu/Desktop/S25/ELEC 327/327-final-proj/oszs/ETA.osz',
 '/Users/donu/Desktop/S25/ELEC 327/327-final-proj/oszs/stupid-horse.osz',
 '/Users/donu/Desktop/S25/ELEC 327/327-final-proj/oszs/duvet.osz',
 '/Users/donu/Desktop/S25/ELEC 327/327-final-proj/oszs/running-man.osz']

In [4]:
def retain_file(filepath):
  """Returns if file should be kept
  At time of writing, keeps osu metadata files and mp3 files
  (wav files are used only for hitsounds (irrelevant))"""
  if not (filepath.endswith('osu') or filepath.endswith('mp3')):
    return False
  return True

In [5]:
def keep_file(filepath):
  """
  Tells which files should be kept
  """
  checks = ['easy' in filepath.lower(),
            'normal' in filepath.lower(),
            'hard' in filepath.lower(),
            'insane' in filepath.lower(),
            'extra' in filepath.lower(),
            'expert' in filepath.lower(),
            'audio.mp3' in filepath,
            ]
  # !impt: taiko diffs are like so: kantan, futsuu, muzukashii, oni
  
  return ((True in checks) and ('.wav' not in filepath))

In [6]:
# unzip them. how?
# # make dir per song
# # retain metadata and audio
os.makedirs(unzip_path, exist_ok=True)
for osz in oszs:
  songname = osz.split('/')[-1].split('.')[0]
  save_path = unzip_path+songname+'/'
  with zipfile.ZipFile(osz, 'r') as zip_ref:
    zip_ref.extractall(save_path)

# delete unneeded files:
# # subdirs is minimal directory paths (e.g. killing-my-love, duvet w.o. abspath)
subdirs = [entry for entry in os.listdir(unzip_path) if os.path.isdir(os.path.join(unzip_path, entry))]
for subdir in subdirs:
  dirpath = os.path.join(unzip_path,subdir)
  for file in os.listdir(dirpath):
    if not keep_file(file):
      os.remove(os.path.join(dirpath,file))

## 2. Extract Hit Objects from .osu

In [7]:
# wiki is extremely well documented
# # https://osu.ppy.sh/wiki/en/Client/File_formats/osu_%28file_format%29#type:~:text=Slider%20border%20colour-,Hit%20objects,-Hit%20object%20syntax

In [8]:
example_song = os.path.join(unzip_path,'lovenote')

In [9]:
example_osu_dir = os.listdir(example_song)
example_osu_dir

['audio.mp3']

In [10]:
#example_osu #= example_osu_dir[2]

In [11]:
def classify_object(flag):
  """
  Derive object type from osu's binary flags
  
  Simplied representation:
  - 'circle'
  - 'slider'
  - 'spinner'
  
  """
  # 0 bit indiates slider
  circle = (flag & 1) != 0 # check neq 0 not eq 1 b/c bit shift
  slider = (flag & 1<<1) != 0
  spinner = (flag & 1<<3) != 0
  
  if circle+slider+spinner>1:
    print("Whaaat")
  if circle: 
    return 'circle'
  if slider: 
    return 'slider'
  if spinner: 
    return 'spinner'
  print(f"Something is wrong in object classification. Recieved flag: {flag}")
  return None

A quick note about taiko. Besides naming convention for difficulties, taiko and osu!std objects are stored identically. hype

In [66]:
def extract_hit_objects(osu_path, just_ints = False):
  """
  Find type information and timing data for all hit objects contained in an .osu file
  
  Inputs:
  - osu_path: path to osu metadata file
  
  Returns three zipped arrays
  - note_types: Array of 'slider', 'spinner' and 'circle' strings
  - note_times: Array of integers, representing time in ms when note appears (relative to song start)
  - note_ints: Array of 32 bit integers written in hex containing relevant note info. See game_code/notes.md for more info
  """
  start_str = '[HitObjects]'
  #csv_fields = 'x,y,time,type,hitSound,objectParams,hitSample'.split(',') # from the wiki
  note_types = []
  note_times = []
  note_ints = []
  with open(osu_path, 'r') as map_file:
    content = map_file.read()
    #print(content.find(start_str))
    # see csv_fields comment for how hit objects are parameterized
    hit_objects = content[content.find(start_str)+len(start_str)+1:].split('\n') # extract hit objects
    #print(len(hit_objects))
    # for now, just time and circle/slider class
    for hit_object in hit_objects:
      if len(hit_object)>5: # there will always be at least 6 params
        hit_object_fields = hit_object.split(',')
        #print(hit_object_fields)
        ms, note_type = hit_object_fields[2],hit_object_fields[3]
        note_types.append(classify_object(int(note_type)))
        note_times.append(int(ms))
  # creating note_ints
  for i in range(len(note_types)):
    # !impt
    has_been_hit = 0
    channel = 0 # 0=L, 1=R
    has_repeat_slider = 0
    
    has_slider = note_types[i]=='slider'
    has_spinner = note_types[i]=='spinner'
    has_circle = note_types[i]=='circle'

    combined_int = has_been_hit<<31 | channel<<30| has_spinner<<29 | has_repeat_slider | has_slider<< 27 | has_circle<<26 | note_times[i]

    binary_int = bin(combined_int)[2:].zfill(32)
    #print(binary_int)
    hex_int_blocks = []
    for i in range(8):
      bin4 = binary_int[4*i:4*i+4]
      hex_int_blocks.append(hex(int('0b'+bin4,2))[2:])
    hex_int = '0x'+''.join(hex_int_blocks)
    #print(hex_int)
    note_ints.append(hex_int)
  if just_ints: # programming sin tbh
    return note_ints
  return list(zip(note_types,note_times,note_ints))
  #return pd.read_csv(content)#,usecols=csv_fields)

In [67]:
song = "/Users/donu/Desktop/S25/ELEC 327/327-final-proj/oszs/charts/killing-my-love/LESLIE PARRISH - KILLING MY LOVE (Cut Ver.) (Pincus) [Pepekcz's Hard Ryosuke's RX-7 FC].osu"
hitobjs = extract_hit_objects(song)

In [63]:
# type conversion
print(len(hitobjs))
for i in hitobjs:
  print(i)
  break


328
('slider', 1675, '0x0800068b')


In [64]:
# test deciphering
correct = 0
for a,b,number in hitobjs:
  get_position = int('0b'+'1'*20,2)
  number_bin = int(number,16)
  correct += (get_position&number_bin)==b
correct==len(hitobjs) #if true all good

True

In [75]:
to_c_string = lambda x: '{'+', '.join(x)+'}'

In [None]:
# !impt: where you extract hit objects
duvet_path_easy = '../../oszs/charts/duvet/boa - Duvet (TV Size) (Girls Love) [Easy].osu' 
eta_path_normal = "../../oszs/charts/ETA/NewJeans - ETA (bielsuu) [miss yo's Normal].osu"
killing_my_love_normal = "../../oszs/charts/LESLIE PARRISH - KILLING MY LOVE (Cut Ver.) (Pincus) [Kirishima-'s Normal Bunta's Impreza].osu"

In [None]:
# add more songs as needed
hitobjs_boa = extract_hit_objects(duvet_path_easy, just_ints=True)
hitobjs_eta = extract_hit_objects(eta_path_normal, just_ints=True)
killing_my_love_normal = extract_hit_objects(eta_path_normal, just_ints=True)
# the results
boa_res = to_c_string(hitobjs_boa)
eta_res = to_c_string(hitobjs_eta)
killing_my_love_res = to_c_string(killing_my_love_normal) 