In [2]:
import numpy as np
import json
from benlp.llms import Chat
from scipy.optimize import linprog

In [24]:
def linear_optimization(c, A_ub, b_ub, A_eq=None, b_eq=None, bounds=None):
    """
    Solves a linear programming optimization problem using the highs algorithm.

    Parameters:
    c (list): The coefficients of the linear objective function to be minimized.
    A_ub (list): The inequality constraint matrix. Each row represents a constraint.
    b_ub (list): The inequality constraint vector. Each element represents the upper bound of the corresponding constraint.
    A_eq (list, optional): The equality constraint matrix. Each row represents a constraint.
    b_eq (list, optional): The equality constraint vector. Each element represents the required value of the corresponding constraint.
    bounds (list, optional): A list of bounds for each variable in the form (min, max).

    Returns:
    dict: A dictionary containing the optimal solution, the minimized objective function value, and the status of the optimization.
    """
    result = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method='highs')
    
    return {
        'x': result.x,
        'fun': result.fun,
        'slack': result.slack,
        'success': result.success,
        'message': result.message,
        'nit': result.nit
    }

In [30]:
def args_parser(json_string):
    data = json.loads(json_string)
    c = np.array(data.get("c"))
    A_ub = np.array(data.get("A_ub"))
    b_ub = np.array(data.get("b_ub"))
    A_eq = np.array(data.get("A_eq")) if data.get("A_eq") is not None else None
    b_eq = np.array(data.get("b_eq")) if data.get("b_eq") is not None else None
    bounds = np.array(data.get("bounds")) if data.get("bounds") is not None else None
    args = c, A_ub, b_ub, A_eq, b_eq, bounds
    return args

In [19]:
code_string = """
def linear_optimization(c, A_ub, b_ub, A_eq=None, b_eq=None, bounds=None):

    Solves a linear programming optimization problem using the highs algorithm.

    Parameters:
    c (list): The coefficients of the linear objective function to be minimized.
    A_ub (list): The inequality constraint matrix. Each row represents a constraint.
    b_ub (list): The inequality constraint vector. Each element represents the upper bound of the corresponding constraint.
    A_eq (list, optional): The equality constraint matrix. Each row represents a constraint.
    b_eq (list, optional): The equality constraint vector. Each element represents the required value of the corresponding constraint.
    bounds (list, optional): A list of bounds for each variable in the form (min, max).

    Returns:
    dict: A dictionary containing the optimal solution, the minimized objective function value, and the status of the optimization.

    result = linprog(c, A_ub=A_ub, b_ub=b_ub, A_eq=A_eq, b_eq=b_eq, bounds=bounds, method='highs')
    
    return {
        'x': result.x,
        'fun': result.fun,
        'slack': result.slack,
        'success': result.success,
        'message': result.message,
        'nit': result.nit
    }
"""

In [63]:
docs = """
scipy.optimize.linprog
scipy.optimize.linprog(c, A_ub=None, b_ub=None, A_eq=None, b_eq=None, bounds=None, method='highs', callback=None, options=None, x0=None, integrality=None)[source]
Linear programming: minimize a linear objective function subject to linear equality and inequality constraints.

Linear programming solves problems of the following form:

where 
 is a vector of decision variables; 
, 
, 
, 
, and 
 are vectors; and 
 and 
 are matrices.

Alternatively, that’s:

minimize:

c @ x
such that:

A_ub @ x <= b_ub
A_eq @ x == b_eq
lb <= x <= ub
Note that by default lb = 0 and ub = None unless specified with bounds.

Parameters:
c1-D array
The coefficients of the linear objective function to be minimized.

A_ub2-D array, optional
The inequality constraint matrix. Each row of A_ub specifies the coefficients of a linear inequality constraint on x.

b_ub1-D array, optional
The inequality constraint vector. Each element represents an upper bound on the corresponding value of A_ub @ x.

A_eq2-D array, optional
The equality constraint matrix. Each row of A_eq specifies the coefficients of a linear equality constraint on x.

b_eq1-D array, optional
The equality constraint vector. Each element of A_eq @ x must equal the corresponding element of b_eq.

boundssequence, optional
A sequence of (min, max) pairs for each element in x, defining the minimum and maximum values of that decision variable. Use None to indicate that there is no bound. By default, bounds are (0, None) (all decision variables are non-negative). If a single tuple (min, max) is provided, then min and max will serve as bounds for all decision variables.

methodstr, optional
The algorithm used to solve the standard form problem. ‘highs’ (default), ‘highs-ds’, ‘highs-ipm’, ‘interior-point’ (legacy), ‘revised simplex’ (legacy), and ‘simplex’ (legacy) are supported. The legacy methods are deprecated and will be removed in SciPy 1.11.0.

callbackcallable, optional
If a callback function is provided, it will be called at least once per iteration of the algorithm. The callback function must accept a single scipy.optimize.OptimizeResult consisting of the following fields:

x1-D array
The current solution vector.

funfloat
The current value of the objective function c @ x.

successbool
True when the algorithm has completed successfully.

slack1-D array
The (nominally positive) values of the slack, b_ub - A_ub @ x.

con1-D array
The (nominally zero) residuals of the equality constraints, b_eq - A_eq @ x.

phaseint
The phase of the algorithm being executed.

statusint
An integer representing the status of the algorithm.

0 : Optimization proceeding nominally.

1 : Iteration limit reached.

2 : Problem appears to be infeasible.

3 : Problem appears to be unbounded.

4 : Numerical difficulties encountered.

nitint
The current iteration number.

messagestr
A string descriptor of the algorithm status.

Callback functions are not currently supported by the HiGHS methods.

optionsdict, optional
A dictionary of solver options. All methods accept the following options:

maxiterint
Maximum number of iterations to perform. Default: see method-specific documentation.

dispbool
Set to True to print convergence messages. Default: False.

presolvebool
Set to False to disable automatic presolve. Default: True.

All methods except the HiGHS solvers also accept:

tolfloat
A tolerance which determines when a residual is “close enough” to zero to be considered exactly zero.

autoscalebool
Set to True to automatically perform equilibration. Consider using this option if the numerical values in the constraints are separated by several orders of magnitude. Default: False.

rrbool
Set to False to disable automatic redundancy removal. Default: True.

rr_methodstring
Method used to identify and remove redundant rows from the equality constraint matrix after presolve. For problems with dense input, the available methods for redundancy removal are:

“SVD”:
Repeatedly performs singular value decomposition on the matrix, detecting redundant rows based on nonzeros in the left singular vectors that correspond with zero singular values. May be fast when the matrix is nearly full rank.

“pivot”:
Uses the algorithm presented in [5] to identify redundant rows.

“ID”:
Uses a randomized interpolative decomposition. Identifies columns of the matrix transpose not used in a full-rank interpolative decomposition of the matrix.

None:
Uses “svd” if the matrix is nearly full rank, that is, the difference between the matrix rank and the number of rows is less than five. If not, uses “pivot”. The behavior of this default is subject to change without prior notice.

Default: None. For problems with sparse input, this option is ignored, and the pivot-based algorithm presented in [5] is used.

For method-specific options, see show_options('linprog').

x01-D array, optional
Guess values of the decision variables, which will be refined by the optimization algorithm. This argument is currently used only by the ‘revised simplex’ method, and can only be used if x0 represents a basic feasible solution.

integrality1-D array or int, optional
Indicates the type of integrality constraint on each decision variable.

0 : Continuous variable; no integrality constraint.

1 : Integer variable; decision variable must be an integer within bounds.

2 : Semi-continuous variable; decision variable must be within bounds or take value 0.

3 : Semi-integer variable; decision variable must be an integer within bounds or take value 0.

By default, all variables are continuous.

For mixed integrality constraints, supply an array of shape c.shape. To infer a constraint on each decision variable from shorter inputs, the argument will be broadcasted to c.shape using np.broadcast_to.

This argument is currently used only by the 'highs' method and ignored otherwise.

Returns:
resOptimizeResult
A scipy.optimize.OptimizeResult consisting of the fields below. Note that the return types of the fields may depend on whether the optimization was successful, therefore it is recommended to check OptimizeResult.status before relying on the other fields:

x1-D array
The values of the decision variables that minimizes the objective function while satisfying the constraints.

funfloat
The optimal value of the objective function c @ x.

slack1-D array
The (nominally positive) values of the slack variables, b_ub - A_ub @ x.

con1-D array
The (nominally zero) residuals of the equality constraints, b_eq - A_eq @ x.

successbool
True when the algorithm succeeds in finding an optimal solution.

statusint
An integer representing the exit status of the algorithm.

0 : Optimization terminated successfully.

1 : Iteration limit reached.

2 : Problem appears to be infeasible.

3 : Problem appears to be unbounded.

4 : Numerical difficulties encountered.

nitint
The total number of iterations performed in all phases.

messagestr
A string descriptor of the exit status of the algorithm.

See also

show_options
Additional options accepted by the solvers.

Notes

This section describes the available solvers that can be selected by the ‘method’ parameter.

‘highs-ds’ and ‘highs-ipm’ are interfaces to the HiGHS simplex and interior-point method solvers [13], respectively. ‘highs’ (default) chooses between the two automatically. These are the fastest linear programming solvers in SciPy, especially for large, sparse problems; which of these two is faster is problem-dependent. The other solvers (‘interior-point’, ‘revised simplex’, and ‘simplex’) are legacy methods and will be removed in SciPy 1.11.0.

Method highs-ds is a wrapper of the C++ high performance dual revised simplex implementation (HSOL) [13], [14]. Method highs-ipm is a wrapper of a C++ implementation of an interior-point method [13]; it features a crossover routine, so it is as accurate as a simplex solver. Method highs chooses between the two automatically. For new code involving linprog, we recommend explicitly choosing one of these three method values.

New in version 1.6.0.

Method interior-point uses the primal-dual path following algorithm as outlined in [4]. This algorithm supports sparse constraint matrices and is typically faster than the simplex methods, especially for large, sparse problems. Note, however, that the solution returned may be slightly less accurate than those of the simplex methods and will not, in general, correspond with a vertex of the polytope defined by the constraints.

New in version 1.0.0.

Method revised simplex uses the revised simplex method as described in [9], except that a factorization [11] of the basis matrix, rather than its inverse, is efficiently maintained and used to solve the linear systems at each iteration of the algorithm.

New in version 1.3.0.

Method simplex uses a traditional, full-tableau implementation of Dantzig’s simplex algorithm [1], [2] (not the Nelder-Mead simplex). This algorithm is included for backwards compatibility and educational purposes.

New in version 0.15.0.

Before applying interior-point, revised simplex, or simplex, a presolve procedure based on [8] attempts to identify trivial infeasibilities, trivial unboundedness, and potential problem simplifications. Specifically, it checks for:

rows of zeros in A_eq or A_ub, representing trivial constraints;

columns of zeros in A_eq and A_ub, representing unconstrained variables;

column singletons in A_eq, representing fixed variables; and

column singletons in A_ub, representing simple bounds.

If presolve reveals that the problem is unbounded (e.g. an unconstrained and unbounded variable has negative cost) or infeasible (e.g., a row of zeros in A_eq corresponds with a nonzero in b_eq), the solver terminates with the appropriate status code. Note that presolve terminates as soon as any sign of unboundedness is detected; consequently, a problem may be reported as unbounded when in reality the problem is infeasible (but infeasibility has not been detected yet). Therefore, if it is important to know whether the problem is actually infeasible, solve the problem again with option presolve=False.

If neither infeasibility nor unboundedness are detected in a single pass of the presolve, bounds are tightened where possible and fixed variables are removed from the problem. Then, linearly dependent rows of the A_eq matrix are removed, (unless they represent an infeasibility) to avoid numerical difficulties in the primary solve routine. Note that rows that are nearly linearly dependent (within a prescribed tolerance) may also be removed, which can change the optimal solution in rare cases. If this is a concern, eliminate redundancy from your problem formulation and run with option rr=False or presolve=False.

Several potential improvements can be made here: additional presolve checks outlined in [8] should be implemented, the presolve routine should be run multiple times (until no further simplifications can be made), and more of the efficiency improvements from [5] should be implemented in the redundancy removal routines.

After presolve, the problem is transformed to standard form by converting the (tightened) simple bounds to upper bound constraints, introducing non-negative slack variables for inequality constraints, and expressing unbounded variables as the difference between two non-negative variables. Optionally, the problem is automatically scaled via equilibration [12]. The selected algorithm solves the standard form problem, and a postprocessing routine converts the result to a solution to the original problem.


"""

In [65]:
word_problems = [
    "The minimum daily req for vitamin A and b are 100 and 200 respectively. pizza has 10 vitamin A and 30 vitamin B. root beer has 20 vitamin A and 10 vitamin b. pizza costs $10 and root beet costs $5. Maximize profit",
    ". A farming cooperative mixes two brands of cattle feed. Brand X costs $25 per bag and contains 2 units of nutritional element A, 2 units of element B, and 2 units of element C. Brand Y costs $20 per bag and contains 1 unit of nutritional element A, 9 units of element B, and 3 units of element C. Find the number of bags of each brand that should be mixed to produce a mixture having a minimum cost per bag. The minimum requirements of nutrients A, B, and C are 12 units, 36 units, and 24 units, respectively."
    "A local family-owned plastic cup manufacturer wants to optimize their production mix in order to maximize their profit. They produce personalized beer mugs and champagne glasses. The profit on a case of beer mugs is $25 while the profit on a case of champagne glasses is $20. The cups are manufactured with a machine called a plastic extruder which feeds on plastic resins. Each case of beer mugs requires 20 lbs. of plastic resins to produce while champagne glasses require 12 lbs. per case. The daily supply of plastic resins is limited to at most 1800 pounds. About 15 cases of either product can be produced per hour. At the moment the family wants to limit their work day to 8 hours.",
]

prompt1 = f"""Given the word problem: {word_problems[0]}, identify the following:
1. The type of problem (maximization or minimization)
2. The objective function.
3. The constraints
4. The variables
5. The bounds

Output the data above.
"""
prompt2 = f"""Here's some docs:\n{docs}""" + """Now, heres some code
{code_string}""" + """
Output a json object with the required arguments to run the function.
Output valid json only. Output rules:

EXAMPLE OUTPUT:
{
    "c": [3, 5],
    "A_ub" = [[1, 2], [3, 2]],
    "b_ub" = [6, 12],
    "A_eq" = null,
    "b_eq" = [1],
    "bounds" = [[0, null], [0, null]
}
"""

In [49]:
old = """
1. Decide if the problem is a maximization or minimization problem. (e.g. maximize profit, minimize cost)
2. Decide if a non-negative constraint is needed due to context. (e.g. can't have negative number of pizzas)
3. Decide if a non-0 constraing is needed due to context. (e.g. can't have 0 pizzas)
4. Do everything else.

1. Must be valid JSON (e.g. {"a": 1, "b": 2})
2. no tuples (e.g. [0, 1, (2, 3)])
3. no trailing commas (e.g. [0, 1, 2,])
4. use null instead of None (e.g. [0, null]) (e.g. {"a": null})
5. no operations, instead evaluate it and output the result. (e.g. 1+1 -> 2, 1*2 -> 2, 1/2 -> 0.5) CORRECT: (2, 2, 0.5) INCORRECT: (1+1, 1*2, 1/2)
6. If the problem is a maximization problem, make sure to negate (c), the objective function.(c => -c OR -c => c)
7. Do not negate anything except for the objective function.
"""

In [67]:
print("Analyzing problem and extracting arguments...")
# sometimes temp works better at 0, sometimes at 1. 
# TODO figure out why
chat = Chat(model="gpt-4", temperature=0)
intermediate = chat(prompt1)['response'].strip()
print("Intermediate Response:\n", intermediate)
unparsed_args = chat(prompt2)['response'].strip()
print("Unparsed args:\n", unparsed_args)
print("Parsing args...")
parsed_args = args_parser(unparsed_args)
# print("Parsed args:")
# for arg in parsed_args:
#     print(arg)
# print("\n\n")
print("Running linear optimization...")
result = linear_optimization(*parsed_args)
print("Result:", result)
print("Interpreting result...")
res = chat(f"Result: {result} \n\n INSTRUCTION: Interperet the result in the context of the problem. Only use data from the above result, do not infer any other data.")['response'].strip()
print("Final Response:\n\n", res)

Analyzing problem and extracting arguments...
Intermediate Response:
 1. The type of problem: Maximization (since the goal is to maximize profit)

2. The objective function: P = 10x + 5y (where P is the profit, x is the number of pizzas, and y is the number of root beers)

3. The constraints: 
   a. 10x + 20y >= 100 (minimum daily requirement for vitamin A)
   b. 30x + 10y >= 200 (minimum daily requirement for vitamin B)

4. The variables: 
   a. x: number of pizzas
   b. y: number of root beers

5. The bounds: 
   a. x >= 0 (cannot have a negative number of pizzas)
   b. y >= 0 (cannot have a negative number of root beers)
Unparsed args:
 {
    "c": [10, 5],
    "A_ub": [[-10, -20], [-30, -10]],
    "b_ub": [-100, -200],
    "A_eq": null,
    "b_eq": null,
    "bounds": [[0, null], [0, null]]
}
Parsing args...
Running linear optimization...
Result: {'x': array([6., 2.]), 'fun': 70.0, 'slack': array([0., 0.]), 'success': True, 'message': 'Optimization terminated successfully. (HiGHS St

In [58]:
print(chat("Why did you negate the A_ub and b_ub?")['response'].strip())

In linear programming, when solving a maximization problem, the standard form of the problem requires the objective function coefficients to be negated. This is done to convert the maximization problem into a minimization problem, which is the standard form for linear programming solvers.

Similarly, when specifying the inequality constraints (A_ub and b_ub), the sign of the constraints should be the same as the original problem. In this case, since the original problem is a maximization problem, the inequality constraints should be written as less than or equal to (≤) constraints. However, linear programming solvers typically expect the constraints to be in the form of greater than or equal to (≥) constraints. To maintain consistency, the coefficients in A_ub and the values in b_ub are negated to convert the original ≤ constraints to ≥ constraints.


In [62]:
argg = args_parser("""{
    "c": [-10, -5],
    "A_ub": [[10, 20], [30, 10]],
    "b_ub": [100, 200],
    "A_eq": null,
    "b_eq": null,
    "bounds": [[0, null], [0, null]]
}""")
                   
print(linear_optimization(*argg))



{'x': array([6., 2.]), 'fun': -70.0, 'slack': array([0., 0.]), 'success': True, 'message': 'Optimization terminated successfully. (HiGHS Status 7: Optimal)', 'nit': 2}
