@@ -118,40 +118,34 @@ def check_valid(results):
118118
119119
120120def  split_gen ():
121-     shutil .rmtree ("sanitized_samples" , ignore_errors = True )
122121    shutil .rmtree ("sanitized_calibrated_samples" , ignore_errors = True )
123-     os .makedirs ("sanitized_samples/complete" , exist_ok = True )
124-     os .makedirs ("sanitized_samples/instruct" , exist_ok = True )
125-     os .makedirs ("sanitized_calibrated_samples/complete" , exist_ok = True )
126-     os .makedirs ("sanitized_calibrated_samples/instruct" , exist_ok = True )
122+     os .makedirs ("sanitized_calibrated_samples/hard/complete" , exist_ok = True )
123+     os .makedirs ("sanitized_calibrated_samples/hard/instruct" , exist_ok = True )
124+     os .makedirs ("sanitized_calibrated_samples/full/complete" , exist_ok = True )
125+     os .makedirs ("sanitized_calibrated_samples/full/instruct" , exist_ok = True )
126+     
127127    for  model , info  in  model_info .items ():
128128        model  =  model .replace ("/" , "--" )
129129        files  =  glob (f"results/{ model }  --bigcodebench-*.jsonl" )
130130        if  info ["link" ].startswith ("https://huggingface.co/" ):
131131            model  =  info ["link" ].split ("https://huggingface.co/" )[- 1 ].replace ("/" , "--" )
132132
133133        for  file  in  files :
134+             if  "-sanitized"  not  in   file  or  "calibrated"  not  in   file :
135+                 continue 
136+                 
134137            _ , suffix  =  os .path .basename (file ).split ("--bigcodebench-" )
135138            with  open (file , "r" ) as  f :
136139                data  =  f .readlines ()
137140
138-             if  "-sanitized"  in  file :
139-                 if  "calibrated"  in  file :
140-                     if  info ["prompted" ]:
141-                         if  suffix .startswith ("complete" ):
142-                             with  open (f"sanitized_calibrated_samples/complete/{ model }  --bigcodebench-{ suffix }  " , "w" ) as  f :
143-                                 f .writelines (data )
144-                         else :
145-                             with  open (f"sanitized_calibrated_samples/instruct/{ model }  --bigcodebench-{ suffix }  " , "w" ) as  f :
146-                                 f .writelines (data )
141+             split_type  =  "hard"  if  "-hard-"  in  file  else  "full" 
142+             if  info ["prompted" ]:
143+                 if  suffix .startswith ("complete" ) or  suffix .startswith ("hard-complete" ):
144+                     with  open (f"sanitized_calibrated_samples/{ split_type }  /complete/{ model }  --bigcodebench-{ suffix }  " , "w" ) as  f :
145+                         f .writelines (data )
147146                else :
148-                     if  suffix .startswith ("complete" ):
149-                         with  open (f"sanitized_samples/complete/{ model }  --bigcodebench-{ suffix }  " , "w" ) as  f :
150-                             f .writelines (data )
151-                     else :
152-                         with  open (f"sanitized_samples/instruct/{ model }  --bigcodebench-{ suffix }  " , "w" ) as  f :
153-                             f .writelines (data )
154- 
147+                     with  open (f"sanitized_calibrated_samples/{ split_type }  /instruct/{ model }  --bigcodebench-{ suffix }  " , "w" ) as  f :
148+                         f .writelines (data )
155149
156150def  read_task_perf (tids , task = "complete" ):
157151    model_results  =  dict ()
@@ -302,7 +296,7 @@ def get_perf_df(data_dict):
302296
303297
304298if  __name__  ==  "__main__" :
305-     #  split_gen()
299+     split_gen ()
306300    bcb_orig  =  load_dataset ("bigcode/bigcodebench" , split = "v0.1.1" )
307301    bcb_hard  =  load_dataset ("bigcode/bigcodebench-hard" , split = "v0.1.1" )
308302    bcb_config  =  {
0 commit comments