In [177]:
import pandas as pd
import plotly.graph_objs as go
import plotly.io as pio
file_name = """229r/task_date_{'x': 10, 'y': 10}_{'NOT': 1.0, 'NAND': 1.0, 'AND': 2.0, 'ORN': 2.0, 'OR': 3.0, 'ANDN': 3.0, 'NOR': 4.0, 'XOR': 4.0, 'EQU': 5.0}_03-06_20-38-48.txt"""

def plot_graph(file_name="""229r/task_date_{'x': 10, 'y': 10}_{'NOT': 1.0, 'NAND': 1.0, 'AND': 2.0, 'ORN': 2.0, 'OR': 3.0, 'ANDN': 3.0, 'NOR': 4.0, 'XOR': 4.0, 'EQU': 5.0}_03-06_20-38-48.txt""", title='Average Fitness over Time', width = 800,    height = 600):

    # Read data from file into a list of strings
    with open(file_name, 'r') as f:
        lines = f.readlines()

    # Parse the lines to create a list of dictionaries, where each dictionary contains values for each column
    data = []
    for line in lines:
        if line.startswith('UD:'):
            values = line.strip().split()
            # print(values)

            data.append({
                'UD': int(values[1]),
                'Gen': float(values[3]),
                'Fit': float(values[5]),
                'Orgs': int(values[7])
            })

    # Create dataframe from list of dictionaries
    df = pd.DataFrame(data)
        

    # Define x and y data
    x_data = df.iloc[:, 0] # First column as x-axis
    y_data = df[["Fit"]]
    # y_data = df.iloc[:, 1:] # Every other column as y-axis

    # Create plotly trace for each column
    data = []
    for column in y_data.columns:
        trace = go.Scatter(x=x_data, y=y_data[column], mode='lines', name=column)
        data.append(trace)

    # Define plot layout
    layout = go.Layout(title=title, xaxis=dict(title='# of updates'), yaxis=dict(title='average fitness'), margin=dict(t=40, b=20, l=20, r=20), autosize=False, width=width, height=height)

    # Create plotly figure
    fig = go.Figure(data=data, layout=layout)

    # Display plot
    file_parts = file_name.split('/')
    fig.show()
    pio.write_image(fig, file_parts[0]+'/plots/'+file_parts[1].split('.')[0]+'.png', width=width, height=height, scale=2)


In [178]:
import os
import pandas as pd
import re

# Directory containing the files
dir_path = '229r/all1 copy 2'

# Directory containing the files
dat_dir_path = 'data/229r/all1 copy 2'

# List to store the file names and last lines
file_data = []

# Regular expression pattern
pattern = r"(\d+)_mxc_(\w+)_(\[.*\])_date_(\d{2}-\d{2})_(\d{2}-\d{2}-\d{2})_xydict_values\((\[.*\])\)\.txt"
# Create an empty DataFrame with the required columns
column_names = ['index', 'max_count', 'values', 'experiment_start_time_string', 'xy_values', 'Last_UD', 'Gen', 'Fit', 'Orgs']
df = pd.DataFrame(columns=column_names)

# Create an empty list to store the data
data_list = []

# Loop through all the files in the directory
for filename in os.listdir(dir_path):
    # Check if the file is a text file
    if filename.endswith('.txt'):
        # Open the file and read the last line
        with open(os.path.join(dir_path, filename), 'r') as f:
            # Search for pattern in the log_file_name
            match = re.search(pattern, filename)
            if not match:
                raise Exception("No match found for pattern in file name: {}".format(filename))
            index = int(match.group(1))
            max_count = match.group(2) == 'True'
            values = tuple(int(i) for i in match.group(3).strip('[]').split(', '))
            experiment_start_time_string = match.group(4) + "_" + match.group(5)
            xy_values = tuple(int(i) for i in match.group(6).strip('[]').split(', '))
            
            lines = f.readlines()
            last_line = lines[-1].strip()
            
            with open(os.path.join(dat_dir_path, filename), 'r') as fdat: 
                lines_dat = fdat.readlines()
                last_line_dat = lines_dat[-1].strip()
                numbers = last_line_dat.split()

                dictionary = {
                    "Update": int(numbers[0]),
                    "Not": int(numbers[1]),
                    "Nand": int(numbers[2]),
                    "And": int(numbers[3]),
                    "OrNot": int(numbers[4]),
                    "Or": int(numbers[5]),
                    "AndNot": int(numbers[6]),
                    "Nor": int(numbers[7]),
                    "Xor": int(numbers[8])
                }
                EQU_dict = {
                    "Equals": int(numbers[9])}

            
            if last_line.startswith('UD:'):
                last_line_string = last_line.strip().split()

                data = {
                'max_count': max_count,
                'values': values,
                'last_line_string': last_line_string,
                'experiment_start_time_string': experiment_start_time_string,
                'xy_values': xy_values,
                'Last_UD': int(last_line_string[1]),
                'Gen': float(last_line_string[3]),
                'Fit': float(last_line_string[5]),
                'Orgs': int(last_line_string[7]),
                'index': index,
                }
                data.update(dictionary)
                EQU_dict.update(data)

                # Append the data to the list
                data_list.append(EQU_dict)
            else: 
                raise Exception("Last line does not start with UD: {}".format(filename))

# Create the DataFrame from the data list
df = pd.DataFrame(data_list)

# Print the dataframe
df


Unnamed: 0,Equals,max_count,values,last_line_string,experiment_start_time_string,xy_values,Last_UD,Gen,Fit,Orgs,index,Update,Not,Nand,And,OrNot,Or,AndNot,Nor,Xor
0,1826,True,"(2, 3, 1, 4, 5)","[UD:, 4400, Gen:, 1017.648, Fit:, 5.304728e+72...",04-13_20-35-15,"(120, 120)",4400,1017.64800,5.304728e+72,14345,0,4400,12397,13294,11875,13104,49,542,9074,0
1,0,False,"(1, 2, 3, 4, 6)","[UD:, 10000, Gen:, 1331.495, Fit:, 1.073964e+1...",04-13_15-10-40,"(120, 120)",10000,1331.49500,1.073964e+158,14259,0,10000,11353,404,12,10494,11233,121,2247,0
2,0,False,"(3, 1, 2, 4, 5)","[UD:, 2200, Gen:, 1359.863, Fit:, 1.065862e+14...",04-14_13-31-51,"(120, 120)",2200,1359.86300,1.065862e+146,14153,2,2200,13518,4609,13,74,0,0,0,0
3,0,False,"(4, 1, 3, 2, 5)","[UD:, 500, Gen:, 45.97935, Fit:, 1.826429e+12,...",04-14_15-58-51,"(120, 120)",500,45.97935,1.826429e+12,3583,2,500,542,290,0,6,0,0,0,0
4,0,True,"(2, 1, 4, 3, 5)","[UD:, 10000, Gen:, 2259.621, Fit:, 16367.07, O...",04-14_03-56-46,"(120, 120)",10000,2259.62100,1.636707e+04,14389,1,10000,13162,13658,6,12150,12124,13590,12012,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,0,False,"(2, 4, 1, 3, 5)","[UD:, 2000, Gen:, 1114.611, Fit:, 6.554231e+35...",04-13_21-27-16,"(120, 120)",2000,1114.61100,6.554231e+35,11157,0,2000,4501,5817,936,5859,85,50,32,0
163,0,True,"(3, 4, 1, 2, 5)","[UD:, 10000, Gen:, 1960.274, Fit:, 9032.465, O...",04-14_14-32-54,"(120, 120)",10000,1960.27400,9.032465e+03,14382,2,10000,13366,13129,12441,13459,97,11975,7,0
164,0,False,"(1, 2, 4, 3, 5)","[UD:, 1500, Gen:, 128.6599, Fit:, 9.142351e+24...",04-14_10-08-54,"(120, 120)",1500,128.65990,9.142351e+244,14373,2,1500,1294,107,1,705,3,0,0,0
165,0,True,"(3, 1, 4, 2, 5)","[UD:, 10000, Gen:, 2230.110, Fit:, 39000.65, O...",04-14_13-33-19,"(120, 120)",10000,2230.11000,3.900065e+04,14347,2,10000,13569,12583,10077,11806,13063,11254,12363,0


In [179]:
import pandas as pd
import pickle

# Pickle the dataframe to a file
with open('229r/results/all1_copy_fitness_and_counts_df.pickle', 'wb') as f:
    pickle.dump(df, f)


In [180]:
import pandas as pd
import pickle

# Load the pickled dataframe from the file
with open('229r/results/all1_copy_fitness_and_counts_df.pickle', 'rb') as f:
    df = pickle.load(f)

# Print the dataframe
display(df)


Unnamed: 0,Equals,max_count,values,last_line_string,experiment_start_time_string,xy_values,Last_UD,Gen,Fit,Orgs,index,Update,Not,Nand,And,OrNot,Or,AndNot,Nor,Xor
0,1826,True,"(2, 3, 1, 4, 5)","[UD:, 4400, Gen:, 1017.648, Fit:, 5.304728e+72...",04-13_20-35-15,"(120, 120)",4400,1017.64800,5.304728e+72,14345,0,4400,12397,13294,11875,13104,49,542,9074,0
1,0,False,"(1, 2, 3, 4, 6)","[UD:, 10000, Gen:, 1331.495, Fit:, 1.073964e+1...",04-13_15-10-40,"(120, 120)",10000,1331.49500,1.073964e+158,14259,0,10000,11353,404,12,10494,11233,121,2247,0
2,0,False,"(3, 1, 2, 4, 5)","[UD:, 2200, Gen:, 1359.863, Fit:, 1.065862e+14...",04-14_13-31-51,"(120, 120)",2200,1359.86300,1.065862e+146,14153,2,2200,13518,4609,13,74,0,0,0,0
3,0,False,"(4, 1, 3, 2, 5)","[UD:, 500, Gen:, 45.97935, Fit:, 1.826429e+12,...",04-14_15-58-51,"(120, 120)",500,45.97935,1.826429e+12,3583,2,500,542,290,0,6,0,0,0,0
4,0,True,"(2, 1, 4, 3, 5)","[UD:, 10000, Gen:, 2259.621, Fit:, 16367.07, O...",04-14_03-56-46,"(120, 120)",10000,2259.62100,1.636707e+04,14389,1,10000,13162,13658,6,12150,12124,13590,12012,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
162,0,False,"(2, 4, 1, 3, 5)","[UD:, 2000, Gen:, 1114.611, Fit:, 6.554231e+35...",04-13_21-27-16,"(120, 120)",2000,1114.61100,6.554231e+35,11157,0,2000,4501,5817,936,5859,85,50,32,0
163,0,True,"(3, 4, 1, 2, 5)","[UD:, 10000, Gen:, 1960.274, Fit:, 9032.465, O...",04-14_14-32-54,"(120, 120)",10000,1960.27400,9.032465e+03,14382,2,10000,13366,13129,12441,13459,97,11975,7,0
164,0,False,"(1, 2, 4, 3, 5)","[UD:, 1500, Gen:, 128.6599, Fit:, 9.142351e+24...",04-14_10-08-54,"(120, 120)",1500,128.65990,9.142351e+244,14373,2,1500,1294,107,1,705,3,0,0,0
165,0,True,"(3, 1, 4, 2, 5)","[UD:, 10000, Gen:, 2230.110, Fit:, 39000.65, O...",04-14_13-33-19,"(120, 120)",10000,2230.11000,3.900065e+04,14347,2,10000,13569,12583,10077,11806,13063,11254,12363,0


In [181]:
df.describe()

Unnamed: 0,Equals,Last_UD,Gen,Fit,Orgs,index,Update,Not,Nand,And,OrNot,Or,AndNot,Nor,Xor
count,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0,167.0
mean,1757.353293,6705.45509,1619.253895,2.637427e+298,12972.688623,0.958084,6704.790419,8672.994012,8194.946108,3808.035928,8179.335329,5303.467066,5343.071856,3820.766467,901.047904
std,3539.120356,3810.073641,1205.486804,inf,2408.951061,0.809234,3810.556175,5044.171468,5358.130037,5140.448757,5187.876417,5457.021185,5594.086335,5038.037999,2726.881172
min,0.0,500.0,45.97935,0.4338221,3583.0,0.0,500.0,118.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.0,2200.0,502.71015,69439.93,12728.0,0.0,2200.0,3287.0,2066.0,6.5,2757.0,18.5,8.5,0.0,0.0
50%,0.0,10000.0,1685.649,1.367684e+30,14224.0,1.0,10000.0,11231.0,10422.0,165.0,11160.0,2617.0,1903.0,108.0,0.0
75%,887.5,10000.0,2296.2935,5.329313e+145,14361.5,2.0,10000.0,13133.0,13257.5,10071.5,12774.0,11634.0,11608.5,9328.0,1.0
max,12723.0,10000.0,7114.001,3.391337e+300,14391.0,2.0,10000.0,13989.0,14174.0,13296.0,14077.0,13722.0,13855.0,12857.0,12445.0


In [182]:
df.columns

Index(['Equals', 'max_count', 'values', 'last_line_string',
       'experiment_start_time_string', 'xy_values', 'Last_UD', 'Gen', 'Fit',
       'Orgs', 'index', 'Update', 'Not', 'Nand', 'And', 'OrNot', 'Or',
       'AndNot', 'Nor', 'Xor'],
      dtype='object')

In [183]:
# Get all the rows with max_count = False and Last_UD = 10000
filtered_df = df[(df['max_count'] == False) & (df['Last_UD'] == 10000)]

In [184]:
filtered_df

Unnamed: 0,Equals,max_count,values,last_line_string,experiment_start_time_string,xy_values,Last_UD,Gen,Fit,Orgs,index,Update,Not,Nand,And,OrNot,Or,AndNot,Nor,Xor
1,0,False,"(1, 2, 3, 4, 6)","[UD:, 10000, Gen:, 1331.495, Fit:, 1.073964e+1...",04-13_15-10-40,"(120, 120)",10000,1331.495,1.073964e+158,14259,0,10000,11353,404,12,10494,11233,121,2247,0
19,0,False,"(3, 2, 4, 1, 5)","[UD:, 10000, Gen:, 706.1867, Fit:, 5.579337e+1...",04-14_06-14-08,"(120, 120)",10000,706.1867,5.579337e+119,12930,1,10000,4741,140,0,1956,3589,10,21,0
20,0,False,"(3, 4, 1, 2, 5)","[UD:, 10000, Gen:, 1314.804, Fit:, 7.214520e+3...",04-14_14-50-55,"(120, 120)",10000,1314.804,7.214519999999999e+35,9911,2,10000,2132,6866,3238,6854,20,11,4,10
24,0,False,"(3, 2, 1, 4, 5)","[UD:, 10000, Gen:, 749.8986, Fit:, 4.651467e+1...",04-14_14-11-05,"(120, 120)",10000,749.8986,4.651467e+166,9318,2,10000,3391,37,0,4,0,0,0,0
30,0,False,"(1, 4, 2, 3, 5)","[UD:, 10000, Gen:, 3841.312, Fit:, 3.357075e+9...",04-14_10-35-02,"(120, 120)",10000,3841.312,3.357075e+94,13780,2,10000,9730,238,2,11735,19,10099,0,0
41,0,False,"(1, 2, 3, 4, 3)","[UD:, 10000, Gen:, 4381.775, Fit:, 3.019758e+2...",04-14_08-28-00,"(120, 120)",10000,4381.775,3.019758e+282,14046,2,10000,981,187,13,1861,12644,1458,11342,0
61,2,False,"(1, 2, 4, 3, 5)","[UD:, 10000, Gen:, 2458.093, Fit:, 7.271464e+4...",04-14_02-37-03,"(120, 120)",10000,2458.093,7.271464000000001e+42,14063,1,10000,3967,667,714,13846,13722,12852,4573,2
62,0,False,"(2, 3, 1, 4, 5)","[UD:, 10000, Gen:, 2841.018, Fit:, 9.692172e+1...",04-14_12-04-29,"(120, 120)",10000,2841.018,9.692172000000001e+180,14052,2,10000,12492,400,19,10754,74,139,75,0
68,0,False,"(4, 1, 2, 3, 5)","[UD:, 10000, Gen:, 3737.718, Fit:, 1.775644e+9...",04-14_15-33-46,"(120, 120)",10000,3737.718,1.775644e+93,13406,2,10000,118,12367,20,206,3,1,0,0
69,0,False,"(2, 4, 1, 3, 5)","[UD:, 10000, Gen:, 3283.730, Fit:, 4.517058e+1...",04-14_12-47-23,"(120, 120)",10000,3283.73,4.517058e+123,13594,2,10000,11231,13472,4,13561,15,0,0,0


In [185]:
sorted_EQU_df = df.sort_values("Equals", ascending=False)

In [186]:
sorted_EQU_df

Unnamed: 0,Equals,max_count,values,last_line_string,experiment_start_time_string,xy_values,Last_UD,Gen,Fit,Orgs,index,Update,Not,Nand,And,OrNot,Or,AndNot,Nor,Xor
160,12723,True,"(4, 1, 2, 3, 5)","[UD:, 10000, Gen:, 2169.567, Fit:, 5.271931e+2...",04-14_15-15-07,"(120, 120)",10000,2169.5670,5.271931e+27,14260,2,10000,13532,13302,649,13460,13100,5681,11592,12445
11,11855,True,"(4, 3, 1, 2, 5)","[UD:, 10000, Gen:, 2338.887, Fit:, 1185905., O...",04-14_00-00-48,"(120, 120)",10000,2338.8870,1.185905e+06,14309,0,10000,12938,13563,12421,12757,411,342,13,0
35,11343,True,"(4, 1, 3, 2, 5)","[UD:, 10000, Gen:, 2283.541, Fit:, 2.116421e+1...",04-14_15-42-47,"(120, 120)",10000,2283.5410,2.116421e+14,14387,2,10000,13764,12981,11442,12922,11911,346,154,2
72,11307,True,"(3, 1, 2, 4, 5)","[UD:, 4000, Gen:, 1542.077, Fit:, 4.576376e+24...",04-13_21-41-21,"(120, 120)",4000,1542.0770,4.576376e+243,14375,0,4000,12621,12923,66,12904,40,381,36,10871
147,11068,True,"(3, 4, 2, 1, 5)","[UD:, 9700, Gen:, 2469.033, Fit:, 6.678235e+21...",04-13_22-48-30,"(120, 120)",9700,2469.0330,6.678235e+213,14346,0,9700,13110,12500,12583,13091,11245,410,108,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
80,0,False,"(2, 3, 4, 1, 5)","[UD:, 1700, Gen:, 228.5847, Fit:, 7.726926e+20...",04-13_21-13-20,"(120, 120)",1700,228.5847,7.726926e+20,10661,0,1700,5309,3743,1,4309,398,1,1,0
25,0,False,"(4, 3, 1, 2, 5)","[UD:, 1500, Gen:, 101.7994, Fit:, 1.296117e+10...",04-14_07-37-48,"(120, 120)",1500,101.7994,1.296117e+10,6116,1,1500,337,764,1,19,0,0,0,0
26,0,True,"(1, 2, 3, 4, 3)","[UD:, 10000, Gen:, 2120.008, Fit:, 3950.506, O...",04-14_08-13-03,"(120, 120)",10000,2120.0080,3.950506e+03,14383,2,10000,12876,13762,90,12448,12540,13354,11878,0
27,0,True,"(2, 3, 4, 1, 5)","[UD:, 10000, Gen:, 1780.615, Fit:, 8302.947, O...",04-14_12-15-51,"(120, 120)",10000,1780.6150,8.302947e+03,14373,2,10000,13058,13812,39,12719,12150,13326,38,0


In [187]:
cleaned_df = sorted_EQU_df[sorted_EQU_df['Last_UD'] == 10000]
display(cleaned_df)

Unnamed: 0,Equals,max_count,values,last_line_string,experiment_start_time_string,xy_values,Last_UD,Gen,Fit,Orgs,index,Update,Not,Nand,And,OrNot,Or,AndNot,Nor,Xor
160,12723,True,"(4, 1, 2, 3, 5)","[UD:, 10000, Gen:, 2169.567, Fit:, 5.271931e+2...",04-14_15-15-07,"(120, 120)",10000,2169.567,5.271931e+27,14260,2,10000,13532,13302,649,13460,13100,5681,11592,12445
11,11855,True,"(4, 3, 1, 2, 5)","[UD:, 10000, Gen:, 2338.887, Fit:, 1185905., O...",04-14_00-00-48,"(120, 120)",10000,2338.887,1.185905e+06,14309,0,10000,12938,13563,12421,12757,411,342,13,0
35,11343,True,"(4, 1, 3, 2, 5)","[UD:, 10000, Gen:, 2283.541, Fit:, 2.116421e+1...",04-14_15-42-47,"(120, 120)",10000,2283.541,2.116421e+14,14387,2,10000,13764,12981,11442,12922,11911,346,154,2
31,10881,True,"(3, 2, 1, 4, 5)","[UD:, 10000, Gen:, 2657.456, Fit:, 1.444145e+0...",04-14_05-44-38,"(120, 120)",10000,2657.456,1.444145e+09,14325,1,10000,13334,13758,11366,13632,29,511,12219,11650
78,10856,True,"(3, 2, 4, 1, 5)","[UD:, 10000, Gen:, 3900.183, Fit:, 5.582985e+1...",04-14_05-59-34,"(120, 120)",10000,3900.183,5.582985e+139,14201,1,10000,13695,13254,94,11243,12991,8617,249,8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,0,False,"(1, 2, 3, 4, 6)","[UD:, 10000, Gen:, 1331.495, Fit:, 1.073964e+1...",04-13_15-10-40,"(120, 120)",10000,1331.495,1.073964e+158,14259,0,10000,11353,404,12,10494,11233,121,2247,0
82,0,True,"(1, 4, 3, 2, 5)","[UD:, 10000, Gen:, 1933.172, Fit:, 11834.84, O...",04-14_10-47-40,"(120, 120)",10000,1933.172,1.183484e+04,14365,2,10000,13179,13084,10962,12802,11557,11839,25,0
26,0,True,"(1, 2, 3, 4, 3)","[UD:, 10000, Gen:, 2120.008, Fit:, 3950.506, O...",04-14_08-13-03,"(120, 120)",10000,2120.008,3.950506e+03,14383,2,10000,12876,13762,90,12448,12540,13354,11878,0
27,0,True,"(2, 3, 4, 1, 5)","[UD:, 10000, Gen:, 1780.615, Fit:, 8302.947, O...",04-14_12-15-51,"(120, 120)",10000,1780.615,8.302947e+03,14373,2,10000,13058,13812,39,12719,12150,13326,38,0


In [188]:
# cleaned_df[cleaned_df["max_count"]].describe().to_latex(index=False)
d1 = cleaned_df[cleaned_df["max_count"]]
d1 = d1[["Equals", "Fit"]]
d1 = d1.describe()
d1 = d1.drop(["count", "std"])
print(d1.to_latex(index=True))

\begin{tabular}{lrr}
\toprule
{} &        Equals &            Fit \\
\midrule
mean &   2912.730159 &  4.474351e+189 \\
min  &      0.000000 &   6.751146e+01 \\
25\%  &      0.000000 &   1.451615e+04 \\
50\%  &      0.000000 &   3.900065e+04 \\
75\%  &   6710.000000 &   8.001358e+09 \\
max  &  12723.000000 &  2.818841e+191 \\
\bottomrule
\end{tabular}




In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.



In [189]:
# display(cleaned_df[~cleaned_df["max_count"]].describe())
# cleaned_df[cleaned_df["max_count"]].describe().to_latex(index=False)
d1 = cleaned_df[~cleaned_df["max_count"]]
d1 = d1[["Equals", "Fit"]]
d1 = d1.describe()
d1 = d1.drop(["count", "std"])
print(d1.to_latex(index=True))

\begin{tabular}{lrr}
\toprule
{} &    Equals &            Fit \\
\midrule
mean &  0.181818 &  1.372617e+281 \\
min  &  0.000000 &   4.261628e+21 \\
25\%  &  0.000000 &   1.998810e+90 \\
50\%  &  0.000000 &  2.793190e+119 \\
75\%  &  0.000000 &  8.054730e+157 \\
max  &  2.000000 &  3.019758e+282 \\
\bottomrule
\end{tabular}




In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.



In [190]:
cleaned_df = cleaned_df[["values", "max_count", "Equals", "Fit", "index"]]

In [191]:
diff_vals_df = cleaned_df.sort_values("values")

In [192]:
print(diff_vals_df.to_latex(index=False))

\begin{tabular}{llrrr}
\toprule
         values &  max\_count &  Equals &           Fit &  index \\
\midrule
(1, 2, 3, 4, 1) &       True &       0 &  3.881085e+03 &      1 \\
(1, 2, 3, 4, 1) &       True &       0 &  3.653363e+03 &      2 \\
(1, 2, 3, 4, 1) &       True &      19 &  1.712673e+04 &      0 \\
(1, 2, 3, 4, 2) &       True &       0 &  1.602652e+04 &      2 \\
(1, 2, 3, 4, 2) &       True &       0 &  1.487709e+04 &      0 \\
(1, 2, 3, 4, 2) &       True &       2 &  1.533196e+04 &      1 \\
(1, 2, 3, 4, 3) &      False &       0 & 3.019758e+282 &      2 \\
(1, 2, 3, 4, 3) &      False &       0 & 3.754598e+177 &      0 \\
(1, 2, 3, 4, 3) &       True &       0 &  1.415521e+04 &      1 \\
(1, 2, 3, 4, 3) &       True &       0 &  3.950506e+03 &      2 \\
(1, 2, 3, 4, 3) &       True &   10419 &  8.829428e+21 &      0 \\
(1, 2, 3, 4, 4) &       True &       1 &  1.006989e+05 &      1 \\
(1, 2, 3, 4, 4) &      False &       0 & 1.508958e+135 &      1 \\
(1, 2, 3, 4, 4) &   


In future versions `DataFrame.to_latex` is expected to utilise the base implementation of `Styler.to_latex` for formatting and rendering. The arguments signature may therefore change. It is recommended instead to use `DataFrame.style.to_latex` which also contains additional functionality.



In [193]:
display(diff_vals_df)

Unnamed: 0,values,max_count,Equals,Fit,index
21,"(1, 2, 3, 4, 1)",True,0,3.881085e+03,1
161,"(1, 2, 3, 4, 1)",True,0,3.653363e+03,2
93,"(1, 2, 3, 4, 1)",True,19,1.712673e+04,0
113,"(1, 2, 3, 4, 2)",True,0,1.602652e+04,2
91,"(1, 2, 3, 4, 2)",True,0,1.487709e+04,0
...,...,...,...,...,...
53,"(4, 2, 3, 1, 5)",True,0,1.676153e+04,0
138,"(4, 3, 1, 2, 5)",False,0,1.032054e+137,0
11,"(4, 3, 1, 2, 5)",True,11855,1.185905e+06,0
97,"(4, 3, 2, 1, 5)",True,0,1.022214e+05,1


In [197]:
gdf = diff_vals_df.groupby(["values", "max_count"]).mean()
gdf

Unnamed: 0_level_0,Unnamed: 1_level_0,Equals,Fit,index
values,max_count,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
"(1, 2, 3, 4, 1)",True,6.333333,8220.393,1.0
"(1, 2, 3, 4, 2)",True,0.666667,15411.86,1.0
"(1, 2, 3, 4, 3)",False,0.0,1.509879e+282,1.0
"(1, 2, 3, 4, 3)",True,3473.0,2.943143e+21,1.0
"(1, 2, 3, 4, 4)",False,0.0,1.508958e+135,1.0
"(1, 2, 3, 4, 4)",True,686.0,3.677703e+33,1.0
"(1, 2, 3, 4, 5)",False,0.0,1.301802e+75,1.0
"(1, 2, 3, 4, 5)",True,4958.0,34569.19,1.0
"(1, 2, 3, 4, 6)",False,0.5,1.0109900000000001e+271,0.5
"(1, 2, 3, 4, 6)",True,3722.0,133614400000.0,0.0


In [199]:
gdf = gdf.reset_index().sort_values("Equals")

In [201]:
gdf[gdf["max_count"] == False]

Unnamed: 0,values,max_count,Equals,Fit,index
26,"(2, 4, 1, 3, 5)",False,0.0,4.517058e+123,2.0
29,"(3, 1, 2, 4, 5)",False,0.0,4.261628e+21,1.0
19,"(2, 1, 3, 4, 5)",False,0.0,7.043458e+116,0.0
31,"(3, 2, 1, 4, 5)",False,0.0,4.651467e+166,2.0
17,"(1, 4, 3, 2, 5)",False,0.0,1.466622e+130,0.0
33,"(3, 2, 4, 1, 5)",False,0.0,5.579337e+119,1.0
15,"(1, 4, 2, 3, 5)",False,0.0,3.357075e+94,2.0
35,"(3, 4, 1, 2, 5)",False,0.0,7.214519999999999e+35,2.0
13,"(1, 3, 4, 2, 5)",False,0.0,3.954544e+99,0.0
24,"(2, 3, 4, 1, 5)",False,0.0,9.685310999999999e+40,2.0


In [202]:
gdf[gdf["max_count"] == True]

Unnamed: 0,values,max_count,Equals,Fit,index
11,"(1, 2, 4, 3, 5)",True,0.0,21904.11,1.0
12,"(1, 3, 2, 4, 5)",True,0.0,13266.21,1.0
36,"(3, 4, 1, 2, 5)",True,0.0,14289.01,1.0
37,"(3, 4, 2, 1, 5)",True,0.0,74377.47,2.0
42,"(4, 2, 3, 1, 5)",True,0.0,16761.53,0.0
25,"(2, 3, 4, 1, 5)",True,0.0,44885.4,1.0
1,"(1, 2, 3, 4, 2)",True,0.666667,15411.86,1.0
45,"(4, 3, 2, 1, 5)",True,1.0,113822.6,0.5
0,"(1, 2, 3, 4, 1)",True,6.333333,8220.393,1.0
5,"(1, 2, 3, 4, 4)",True,686.0,3.677703e+33,1.0
