In [1]:
# NOTICE!!!!
# replace 'E' in the equation with 'e' as sympy would treat 'E' as numerical value e.
##################

import sympy as sp
import pandas as pd

# Define custom operations using subclassing of sympy.Function
class Add(sp.Function):
    @classmethod
    def eval(cls, x, y):
        return x + y

class Sub(sp.Function):
    @classmethod
    def eval(cls, x, y):
        return x - y

class Mul(sp.Function):
    @classmethod
    def eval(cls, x, y):
        return x * y

class Div(sp.Function):
    @classmethod
    def eval(cls, x, y):
        try:
            result = sp.Piecewise(
                (1, sp.Eq(y, 0)),  # Return 1 if y is exactly zero
                (1, sp.Eq(y, sp.zoo)),  # Return 1 if y is exactly zero
                (x / y, True)           # Normal division otherwise
            )
            return result.simplify()  # Simplify to handle potential further issues
        except Exception as e:
            print(f"Division error: {e}")
            return 1000  # Return NaN on error
        
# Define the custom operations using subclassing of sympy.Function
class Div_for_visualization(sp.Function):
    @classmethod
    def eval(cls, x, y):
        # Use Piecewise to handle division by zero robustly
        return x / y

class Neg(sp.Function):
    @classmethod
    def eval(cls, x):
        return -x
    

# Update converter for sympify to recognize custom classes
converter = {
    'add': Add,
    'sub': Sub,
    'mul': Mul,
    'div': Div,
    'neg': Neg,
}

# Update converter for sympify to recognize custom classes
converter_vis = {
    'add': Add,
    'sub': Sub,
    'mul': Mul,
    'div': Div_for_visualization,
    'neg': Neg,
}

# Function to apply the equation to each row of the DataFrame
def apply_equation(row):
    # Replace variables in the expression with row data
    local_env = {
        'Pr': row['Pr'],
        'La': row['La'],
        'Gd': row['Gd'],
        'Nd': row['Nd'],
        'Ce': row['Ce'],
        'Sm': row['Sm'],
        'Y' : row['Y'],
        "EN": row['EN'],
        'R' : row['R'],
        'IE': row['IE'],
        'LAS': row['LAS'],
        'M' : row['M'],
        'e' : row['E'],
    }
    try:
        result = expr.subs(local_env)
        # Force numeric evaluation if the result is not numeric
        if isinstance(result, sp.Expr):  # Check if result is still a symbolic expression
            result = result.evalf()  # Evaluate numerically
        # if result == sp.zoo:
        #     result = 1
    except Exception as e:
        print(f"Error evaluating row: {local_env}")
        print(f"Error message: {str(e)}")
        result = None  # Optional: handle error case by assigning a default value or None
    return float(result)

    # print("Local Environment:", local_env)  # Debug: Print the local environment for a row
    # return expr.subs(local_env)

In [2]:
df = pd.read_excel('./data_pred_Pr.xlsx')
# df = pd.read_excel('./all_rp650.xlsx')

df = df.apply(lambda x: round(x, 4) if x.dtype.kind in 'fc' else x)

equ = "add(div(sub(mul(EN, div(Nd, Y)), Y), La), add(div(sub(Pr, e), 3.470), e))"
expr = sp.sympify(equ, locals=converter,evaluate=True)
# Apply the function to each row
df['pred_all'] = df.apply(apply_equation, axis=1)

equ = "add(div(6.424, add(Sm, div(0.466, sub(La, R)))), EN)"
expr = sp.sympify(equ, locals=converter,evaluate=True)
# Apply the function to each row
df['pred_La'] = df.apply(apply_equation, axis=1)

equ = "add(div(sub(Pr, Y), 3.488), add(div(div(sub(EN, IE), sub(EN, Sm)), Y), e))"
expr = sp.sympify(equ, locals=converter,evaluate=True)
# Apply the function to each row
df['pred_Gd'] = df.apply(apply_equation, axis=1)

equ = "add(div(sub(6.163, Y), div(mul(sub(EN, Y), 7.642), sub(Y, La))), add(e, 0.212))"
expr = sp.sympify(equ, locals=converter,evaluate=True)
# Apply the function to each row
df['pred_Nd'] = df.apply(apply_equation, axis=1)

equ = "add(div(sub(e, M), 4.288), add(div(div(Gd, 4.886), mul(sub(LAS, M), add(La, La))), M))"
expr = sp.sympify(equ, locals=converter,evaluate=True)
# Apply the function to each row
df['pred_Ce'] = df.apply(apply_equation, axis=1)

equ = "add(div(3.325, mul(sub(R, Y), mul(Nd, IE))), add(div(sub(M, e), add(La, -2.843)), e))"
expr = sp.sympify(equ, locals=converter,evaluate=True)
# Apply the function to each row
df['pred_Sm'] = df.apply(apply_equation, axis=1)

equ = "add(div(sub(e, add(sub(5.698, Ce), Y)), mul(sub(La, IE), Nd)), e)"
expr = sp.sympify(equ, locals=converter,evaluate=True)
# Apply the function to each row
df['pred_Y'] = df.apply(apply_equation, axis=1)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import KFold
import numpy as np
import matplotlib.pyplot as plt
import pickle

In [None]:
df = df.apply(lambda x: round(x, 4) if x.dtype.kind in 'fc' else x)
df.to_excel("pred_individual.xlsx")

# print(df)

# Convert to 0s and 1s based on condition
mask = (df.iloc[:,1:8] != 5.1907).astype(int)

# print("Transformed Mask:\n", mask)
inputs = df.iloc[:,-7:].astype(float)
# print(inputs)

inputs_masked = np.multiply(inputs.values, mask.values)
# print("Transformed Mask:\n", inputs_masked)

In [None]:
df_output = {}

X = inputs_masked
Y = np.ones(X.shape[0])
# Train Linear Regression model
model = LinearRegression()
# model.fit(X,Y)
model.coef_ = np.array([0.53366074, 0.03944002, 0.06086653, 0.06427795, 0.05694237, 0.07845619, 0.0229135])
model.intercept_ = 1.8960468810261488
# Predict on testing set
Y_pred = model.predict(X)

with open('../label_scaler.pickle', 'rb') as handle:
    scalers = pickle.load(handle)
    mean = scalers.mean_[2]
    std = scalers.scale_[2]

with open('../scaler.pickle', 'rb') as handle:
    scalers = pickle.load(handle)
    scaler = scalers["scaler"]
    add = scalers["add"]

Y_pred_ = (Y_pred-0.01-add)*std+mean

# Applying transformations and rounding to two decimals for each column specified.
df_output["Pr"] = ((df['Pr'] - add) * scaler.scale_[0] + scaler.mean_[0]).round(2)
df_output["La"] = ((df['La'] - add) * scaler.scale_[1] + scaler.mean_[1]).round(2)
df_output["Gd"] = ((df['Gd'] - add) * scaler.scale_[2] + scaler.mean_[2]).round(2)
df_output["Nd"] = ((df['Nd'] - add) * scaler.scale_[3] + scaler.mean_[3]).round(2)
df_output["Ce"] = ((df['Ce'] - add) * scaler.scale_[4] + scaler.mean_[4]).round(2)
df_output["Sm"] = ((df['Sm'] - add) * scaler.scale_[5] + scaler.mean_[5]).round(2)
df_output["Y"] = ((df['Y'] - add) * scaler.scale_[6] + scaler.mean_[6]).round(2)


df_output["pred"] = Y_pred
df_output["pred_original"] = Y_pred_
df_output = pd.DataFrame(df_output)
df_output["rank"] = df_output["pred"].rank(ascending=True, method='average')
df_output.to_excel('pred.xlsx', index=False)

https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations
