In [1]:
import os
import pandas as pd

def extract_folder_info(folder_name):
    """Extract TIME and INSTANCE from the folder name."""
    try:
        first_underscore = folder_name.index('_')
        last_underscore = folder_name.rindex('_')
        TIME = folder_name[:first_underscore]
        INSTANCE = folder_name[first_underscore + 1:last_underscore]
        return TIME, INSTANCE
    except ValueError:
        return None, None

def extract_gap(result_file_path):
    """Extract GAP value from results.txt file."""
    GAP = 0
    if os.path.exists(result_file_path):
        try:
            with open(result_file_path, 'r') as result_file:
                for line in result_file:
                    if line.startswith("GapClosed:"):
                        GAP = float(line.split(':')[1].strip())
                        break
        except Exception as e:
            print(f"Error reading {result_file_path}: {e}")
    return GAP

def extract_solving_time(solving_stat_file_path):
    """Extract SOLVING_TIME value from solving_statistic.txt file."""
    SOLVING_TIME = 3700
    if os.path.exists(solving_stat_file_path):
        try:
            with open(solving_stat_file_path, 'r') as solving_stat_file:
                for line in solving_stat_file:
                    if line.startswith("Total Time"):
                        SOLVING_TIME = float(line.split(':')[1].strip())
                        break
        except Exception as e:
            print(f"Error reading {solving_stat_file_path}: {e}")
    return SOLVING_TIME

def extract_feasibility(result_file_path):
    """Extract FEASIBLE value from results.txt file."""
    FEASIBLE = False
    if os.path.exists(result_file_path):
        try:
            with open(result_file_path, 'r') as result_file:
                for line in result_file:
                    if line.startswith("DbgSolFeasible:"):
                        FEASIBLE = line.split(':')[1].strip().lower() == 'true'
                        break
        except Exception as e:
            print(f"Error reading {result_file_path}: {e}")
    return FEASIBLE

def process_folder(folder_path, folder_name, mode):
    """Process each folder to extract required information."""
    TIME, INSTANCE = extract_folder_info(folder_name)
    if TIME is None or INSTANCE is None:
        return None

    result_file_path = os.path.join(folder_path, 'results.txt')
    solving_stat_file_path = os.path.join(folder_path, 'solving_statistic.txt')

    GAP = extract_gap(result_file_path)
    SOLVING_TIME = extract_solving_time(solving_stat_file_path)
    FEASIBLE = extract_feasibility(result_file_path)

    return [INSTANCE, mode, GAP, SOLVING_TIME, FEASIBLE]

def fetchresult(main_dir):
    """Main function to iterate through directories and collect data."""
    data = []

    for mode in os.listdir(main_dir):
        mode_path = os.path.join(main_dir, mode)
        if os.path.isdir(mode_path):
            for folder_name in os.listdir(mode_path):
                folder_path = os.path.join(mode_path, folder_name)
                if os.path.isdir(folder_path):
                    folder_data = process_folder(folder_path, folder_name, mode)
                    if folder_data:
                        data.append(folder_data)

    df = pd.DataFrame(data, columns=['INSTANCE', 'MODE', 'GAP', 'TIME', 'FEASIBLE'])
    df['GAP'] = df['GAP'].astype(float)

    return df
    # Find and print pairs that a

In [2]:
main_dir = os.path.join(os.getcwd(), 'Data')
df = fetchresult(main_dir)

In [3]:
df[['INSTANCE', 'MODE']].drop_duplicates()

Unnamed: 0,INSTANCE,MODE
0,neos-3381206-awhea,vpcwsubscip
1,roll3000,vpcwsubscip
2,p200x1188c,vpcwsubscip
3,exp-1-500-5-5,vpcwsubscip
4,neos-3216931-puriri,vpcwsubscip
...,...,...
121,neos-3627168-kasai,vpc
122,seymour1,vpc
123,reblock115,vpc
124,uct-subprob,vpc


In [4]:
grouped_df = df.groupby(['INSTANCE', 'MODE']).size().reset_index(name='count')

# Filter pairs that appear more than once
result = grouped_df[grouped_df['count'] > 1]
result

Unnamed: 0,INSTANCE,MODE,count


In [5]:
# Pivot the DataFrame to the desired format
df_pivot = df.pivot(index='INSTANCE', columns='MODE', values=['GAP', 'TIME','FEASIBLE'])

# Flatten the multi-level columns
df_pivot.columns = ['_'.join(col).strip() for col in df_pivot.columns.values]

gap_columns = [col for col in df_pivot.columns if 'GAP' in col]
df_pivot[gap_columns] = df_pivot[gap_columns].round(2)*100

In [6]:
df_pivot

Unnamed: 0_level_0,GAP_gomory,GAP_vpc,GAP_vpcwsubscip,TIME_gomory,TIME_vpc,TIME_vpcwsubscip,FEASIBLE_gomory,FEASIBLE_vpc,FEASIBLE_vpcwsubscip
INSTANCE,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
30n20b8,9.68264,7.70445,0.0,17.11,2252.83,3700.0,False,False,False
beasleyC3,1.17961,1.17961,1.17961,0.1,71.08,41.47,True,True,True
binkar10_1,6.75932,6.69134,6.69134,0.05,24.19,16.22,False,False,False
bnatt400,0.0,0.0,0.0,1.14,2167.38,3700.0,True,True,False
cod105,0.865617,0.437558,0.437558,2.42,105.3,134.9,True,True,True
cost266-UUE,27.2712,18.6341,21.7121,0.23,159.41,67.67,True,True,True
csched007,8.92488,3.38965,3.42123,0.16,142.22,169.84,False,False,False
exp-1-500-5-5,23.1601,21.7312,21.7312,0.04,20.23,14.98,True,True,True
gen-ip002,2.14825,0.906481,0.906481,0.01,0.11,3.22,True,True,True
glass4,0.0,-0.0,0.0,0.06,1.53,4.44,False,False,False


In [8]:
df_pivot.to_csv("results.csv")

In [7]:
df_rounded

NameError: name 'df_rounded' is not defined