Generating a New SBFL Formula Based on a Hybridized Systematic Search and Metric Combination

In [15]:
import sympy as sp
#Method to simplify string type algebraic expression
def simplify_expression(expression):
    # Parse the input expression into a SymPy expression
    expr = sp.sympify(expression)
    # Simplify the expression
    simplified_expr = sp.simplify(expr)
    return simplified_expr

Using pattern in Equation 1 as (x*Ef + y * Ep)/wEf + z£p, where Ef = A, eP=B 

In [16]:
import itertools
# Define the set of possible values for x, y, w, z
#try the subset of values and operators and compare to check the repetition of denominator and inverse formulas
values = [-1,0, 1]
operators=['*','/','+','-']
#possible values of EP and FA
values2 = [-1,0,1]
# Generate all possible combinations of x, y, w, z
combinations = list(itertools.product(values, repeat=4))
print(f'{len(combinations)} possibile metrics')
# Define the variables
#n00=np, n01= nf, n10=ep, n11=ef
#Basic SBFL and MECO terms define in context of Gzoltar variables for easy incorporation 
A, B, C, D, E, F, P, Z  = 'n11', 'n10', 'n11', 'n10', 'n11+n10','n11','(n11+n00)/(n00+n01+n10+n11)','n11+n00'

#A, B, C, D, E, F, P, Z  = 'ef', 'ep', 'ef', 'ep', 'ef+ep','ef','(ef+np)/(ef+nf+np+ep)','ef+np'
# Initialize a list to store the valid expressions 
 
#pattern in eqn 1: method to generate all possible metrics while filtering the metrics with numerator =0 or denominator =0
def term1():
    valid_expressions = []
    for (x, y, w, z) in combinations:
        numerator = f"({x}*{A} + {y}*{B})"
        denominator = f"({w}*{C} + {z}*{D})"
        #cover all possible subsets of MECO terms
        meco_term = f"(({w}*{E}) * ({x}*{P}))+(({y}*{F}) * ({z}*{A}))"
        # Form the numerator and denominator
        # Exclude metrics with 0 as denominator or numerator
        if not (w == 0 and z == 0):
            for operator in operators:
                # To avoid division by zero
                # Form the full expression   {simplify_expression(meco_term)}
                expression = f"({w}*{E}*{z}*{P}){operator}({w}*{F} * {z}*{A})"
                # Append the valid expression to the list
                valid_expressions.append(expression)
                #simplified_expressions.append(expression)
    return valid_expressions
#function for the proposed MECO formula
def meco():
    return f'({E}*{P}) + ({F}*{Z})'
#simplify_list_of_algebraic_expression and remove constants (sloae) -Algebraic simplification of list of formulas/metrics 
def sloae(list):
    simplified_metrics=[]
    for i in list:
        i = simplify_expression(i)
        if not i.is_number:
            simplified_metrics.append(i)
    return simplified_metrics        

    

#Method to create java file for each formula in the list for inclusion into Gzoltar
def javaFiles(list):
    #list.sort()
    for i in range(len(list)):
        #generate java programs
        k=list[i]
        java_code='''/**
 * Copyright (C) 2020 GZoltar contributors.
 * 
 * This file is part of GZoltar.
 * 
 * GZoltar is free software: you can redistribute it and/or modify it under the terms of the GNU
 * Lesser General Public License as published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 * 
 * GZoltar is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
 * the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
 * General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License along with GZoltar. If
 * not, see <https://www.gnu.org/licenses/>.
 */
package com.gzoltar.sfl.formulas;

/**
 * Implementation of Ochiai coefficient from <i>Zoogeographic studies on the soleoid fishes found in
 * Japan and its neighbouring regions<i>.
 * 
 * @author José Campos
 */ '''
        java_code+=f'\n\n'
        java_code+=f"public final class Fo{i+1} extends AbstractSFLFormula"+"{\n"
        java_code+=f"\t @Override"
        java_code+="\n \t public String getName() {"
        java_code+=f'\n \t return "Fo{i+1}";'+"\n \t}"
        java_code+=f"\n \t @Override"
        java_code+="\n \t public double compute(final double n00, final double n01, final double n10, final double n11) {"
        
        #formulas with denominator(s)
        java_code+="\n \t \t try { \n \t \t " 
        java_code+=f"return {k}; \n \t \t"+ " }"        
        java_code+="\n \t \t catch(ArithmeticException e){\n \t \t \t return 0.0; \n \t \t }"
        
        java_code+=f"\n \t \t \n \t"+ " }"
        java_code+="\n}"
        with open(f"Fo{i+1}.java", "a") as f:
            print(f'{java_code}',file=f)
            f.close
#method to print the list of formulas
def printExternal(list, fil):
    #list.sort()
    for i,k in enumerate(list):
        #Algebric simplification and print of the generated formulas/metrics 
        with open(f"{fil}.txt", "a") as f:
            print(f'{i+1} - {k}',file=f)
def printList(list):
    #list.sort()
    for i,k in enumerate(list):
        #Algebric simplification and print of the generated formulas/metrics 
        print(f'{i+1} - {k}')
#Method to remove duplicate members of list  
def remove_duplicates(lst):
    return list(set(lst))
def remove_duplicate(lst):
    unique_list = []
    for item in lst:
       if item not in unique_list:
            unique_list.append(item)
    return unique_list     
# remove duplicates considering absolute values
def remove_opposite(lst):
    unique_list = []
    for item in remove_duplicates(lst):
        item2=simplify_expression(f'-1*{item}')
        if item2 not in unique_list:
            unique_list.append(item)
    return unique_list     
   
#
def eqn1_metric():
    #preprocess the generated metrics by removing constants 
    new_metrics = remove_opposite(sloae(term1()))
    for i in new_metrics:
        try:
            new_metric.remove(int(i))
        except:
            print(i)

#list all possible metrics from term1



81 possibile metrics


In [17]:
print('List of all possible formulas generated Excluding 0 denominator and numerator')
printList(term1())

List of all possible formulas generated Excluding 0 denominator and numerator
1 - (-1*ef+ep*-1*(ef+np)/(ef+nf+np+ep))*(-1*ef * -1*ef)
2 - (-1*ef+ep*-1*(ef+np)/(ef+nf+np+ep))/(-1*ef * -1*ef)
3 - (-1*ef+ep*-1*(ef+np)/(ef+nf+np+ep))+(-1*ef * -1*ef)
4 - (-1*ef+ep*-1*(ef+np)/(ef+nf+np+ep))-(-1*ef * -1*ef)
5 - (-1*ef+ep*0*(ef+np)/(ef+nf+np+ep))*(-1*ef * 0*ef)
6 - (-1*ef+ep*0*(ef+np)/(ef+nf+np+ep))/(-1*ef * 0*ef)
7 - (-1*ef+ep*0*(ef+np)/(ef+nf+np+ep))+(-1*ef * 0*ef)
8 - (-1*ef+ep*0*(ef+np)/(ef+nf+np+ep))-(-1*ef * 0*ef)
9 - (-1*ef+ep*1*(ef+np)/(ef+nf+np+ep))*(-1*ef * 1*ef)
10 - (-1*ef+ep*1*(ef+np)/(ef+nf+np+ep))/(-1*ef * 1*ef)
11 - (-1*ef+ep*1*(ef+np)/(ef+nf+np+ep))+(-1*ef * 1*ef)
12 - (-1*ef+ep*1*(ef+np)/(ef+nf+np+ep))-(-1*ef * 1*ef)
13 - (0*ef+ep*-1*(ef+np)/(ef+nf+np+ep))*(0*ef * -1*ef)
14 - (0*ef+ep*-1*(ef+np)/(ef+nf+np+ep))/(0*ef * -1*ef)
15 - (0*ef+ep*-1*(ef+np)/(ef+nf+np+ep))+(0*ef * -1*ef)
16 - (0*ef+ep*-1*(ef+np)/(ef+nf+np+ep))-(0*ef * -1*ef)
17 - (0*ef+ep*1*(ef+np)/(ef+nf+np+ep))*(0*e

In [18]:
print('~~~~~~~~~~~~~~List of fomulas from eqnt 1 after simplfying and removing constants~~~~~~~~~')
printList(sloae(term1()))  

~~~~~~~~~~~~~~List of fomulas from eqnt 1 after simplfying and removing constants~~~~~~~~~
1 - ef**2*(-ef*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
2 - (-ef*(ef + ep + nf + np) - ep*(ef + np))/(ef**2*(ef + ep + nf + np))
3 - (ef*(ef - 1)*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
4 - (ef*(-ef - 1)*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
5 - zoo*ef
6 - -ef
7 - -ef
8 - ef**2*(ef*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
9 - (ef*(ef + ep + nf + np) - ep*(ef + np))/(ef**2*(ef + ep + nf + np))
10 - (-ef*(ef + 1)*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np)
11 - (ef*(ef - 1)*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np)
12 - zoo*ep*(ef + np)/(ef + ep + nf + np)
13 - -ep*(ef + np)/(ef + ep + nf + np)
14 - -ep*(ef + np)/(ef + ep + nf + np)
15 - zoo*ep*(ef + np)/(ef + ep + nf + np)
16 - ep*(ef + np)/(ef + ep + nf + np)
17 - ep*(ef + np)/(ef + ep + nf + np)
18 - ef**2*(-ef*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep +

We tend to extend and hybridize the systematic formula search work by Qusay Idrees Sarhan et al (2023) and the Adekunle Ajibode et al (2022) metric combination.

In [19]:
#remove duplicates
printList(remove_duplicate(sloae(term1())))


1 - ef**2*(-ef*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
2 - (-ef*(ef + ep + nf + np) - ep*(ef + np))/(ef**2*(ef + ep + nf + np))
3 - (ef*(ef - 1)*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
4 - (ef*(-ef - 1)*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
5 - zoo*ef
6 - -ef
7 - ef**2*(ef*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
8 - (ef*(ef + ep + nf + np) - ep*(ef + np))/(ef**2*(ef + ep + nf + np))
9 - (-ef*(ef + 1)*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np)
10 - (ef*(ef - 1)*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np)
11 - zoo*ep*(ef + np)/(ef + ep + nf + np)
12 - -ep*(ef + np)/(ef + ep + nf + np)
13 - ep*(ef + np)/(ef + ep + nf + np)
14 - ef**2*(-ef*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np)
15 - (-ef*(ef + ep + nf + np) + ep*(ef + np))/(ef**2*(ef + ep + nf + np))
16 - (ef*(1 - ef)*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
17 - (ef*(ef + 1)*(ef + ep + nf + np) - ep*(ef + np))/(ef + e

In [20]:
# Print all valid expressions
#preprocess the generate metrics by removing 
printList(remove_duplicate(sloae(term1())))
#list all possible metrics from term1
#print the list of the formulas in an external txt file
printExternal(remove_duplicate(sloae(term1())),'formulas')
#print the list of the formulas in an external txt file
javaFiles(remove_duplicate(sloae(term1())))


1 - ef**2*(-ef*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
2 - (-ef*(ef + ep + nf + np) - ep*(ef + np))/(ef**2*(ef + ep + nf + np))
3 - (ef*(ef - 1)*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
4 - (ef*(-ef - 1)*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
5 - zoo*ef
6 - -ef
7 - ef**2*(ef*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
8 - (ef*(ef + ep + nf + np) - ep*(ef + np))/(ef**2*(ef + ep + nf + np))
9 - (-ef*(ef + 1)*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np)
10 - (ef*(ef - 1)*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np)
11 - zoo*ep*(ef + np)/(ef + ep + nf + np)
12 - -ep*(ef + np)/(ef + ep + nf + np)
13 - ep*(ef + np)/(ef + ep + nf + np)
14 - ef**2*(-ef*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np)
15 - (-ef*(ef + ep + nf + np) + ep*(ef + np))/(ef**2*(ef + ep + nf + np))
16 - (ef*(1 - ef)*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np)
17 - (ef*(ef + 1)*(ef + ep + nf + np) - ep*(ef + np))/(ef + e

In [21]:
term1_metrics = remove_opposite(sloae(term1()))
term1_metrics

[ef**2*(ef*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np),
 (ef*(ef - 1)*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np),
 -ep*(ef + np)/(ef + ep + nf + np),
 (ef*(ef - 1)*(ef + ep + nf + np) - ep*(ef + np))/(ef + ep + nf + np),
 ef,
 zoo*ep*(ef + np)/(ef + ep + nf + np),
 (ef*(ef + 1)*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np),
 (-ef*(ef + 1)*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np),
 (ef*(ef + ep + nf + np) + ep*(ef + np))/(ef**2*(ef + ep + nf + np)),
 zoo*ef,
 ef**2*(ef*(ef + ep + nf + np) + ep*(ef + np))/(ef + ep + nf + np),
 (ef*(ef + ep + nf + np) - ep*(ef + np))/(ef**2*(ef + ep + nf + np))]