In [None]:
from kan import *
import numpy as np
import matplotlib.pyplot as plt
import sympy as sp
from sklearn.datasets import make_moons
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OrdinalEncoder
np.random.seed(0)

### Usando fórmula

In [None]:
dataset = {}
train_input, train_label = make_moons(n_samples=1000, shuffle=True, noise=0.1, random_state=0)
test_input, test_label = make_moons(n_samples=1000, shuffle=True, noise=0.1, random_state=0)

dtype = torch.get_default_dtype()
dataset['train_input'] = torch.from_numpy(train_input).type(dtype)
dataset['test_input'] = torch.from_numpy(test_input).type(dtype)
dataset['train_label'] = torch.from_numpy(train_label[:,None]).type(dtype)
dataset['test_label'] = torch.from_numpy(test_label[:,None]).type(dtype)

X = dataset['train_input']
y = dataset['train_label']
plt.figure()
plt.scatter(dataset['train_input'][:, 0], dataset['train_input'][:, 1], c=dataset['train_label'])
plt.title('TRAIN')
plt.show()
plt.figure()
plt.scatter(dataset['test_input'][:, 0], dataset['test_input'][:, 1], c=dataset['test_label'])
plt.title('TEST')
plt.show()

In [None]:
# [2,5,1] means 2 inputs, 5 hidden add neurons, and 1 output
# [2,[5,2],1] means 2 inputs, 5 hidden add neurons and 2 hidden multiplication neurons, and 1 output.
# grid é a 'resolução' da spline
# k é a ordem polinomial da spline
model = KAN(width=[2, 1], grid=3, k=3)

def train_acc():
    return torch.mean((torch.round(model(dataset['train_input'])[:,0]) == dataset['train_label'][:,0]).type(dtype))

def test_acc():
    return torch.mean((torch.round(model(dataset['test_input'])[:,0]) == dataset['test_label'][:,0]).type(dtype))

model_results = model.fit(dataset, opt="LBFGS", steps=20, metrics=(train_acc, test_acc))

In [None]:
model.plot()

In [None]:
lib = ['x', 'x^2', 'x^3', 'x^4', 'exp', 'log', 'sqrt', 'tanh', 'sin', 'tan', 'abs', 'cos']
model.auto_symbolic(lib=lib)
formula = model.symbolic_formula()[0][0]

In [None]:
expr_func = sp.lambdify(['x_1', 'x_2'], formula, "numpy")
results = expr_func(dataset['test_input'][:, 0], dataset['test_input'][:, 1])
plt.figure()
plt.scatter(dataset['test_input'][:, 0], dataset['test_input'][:, 1], c=results)  # np.round(results,0)
plt.title('PREDICTION')
plt.show()

### Usando Dataset

In [None]:
df = pd.read_csv('winequality-red.csv', sep=";")
df

In [None]:
X = df.drop(columns=['quality'])
y = df['quality']

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [None]:
dtype = torch.get_default_dtype()

dataset = {}
dataset['train_input'] = torch.from_numpy(X_train.values).type(dtype)
dataset['test_input'] = torch.from_numpy(X_test.values).type(dtype)

encoder = OrdinalEncoder()
encoder.fit_transform(y_train.values.reshape(-1, 1))
dataset['train_label'] = torch.from_numpy(encoder.fit_transform(y_train.values.reshape(-1, 1)).flatten()).type(torch.LongTensor)
dataset['test_label'] = torch.from_numpy(encoder.transform(y_train.values.reshape(-1, 1)).flatten()).type(torch.LongTensor)

In [None]:
model = KAN(width=[X_train.shape[1], 6, 6], grid=3, k=3)
model_results = model.fit(dataset, opt="LBFGS", steps=100, loss_fn=torch.nn.CrossEntropyLoss())

In [None]:
model.plot()

In [None]:
model.prune()
model.plot()

In [None]:
lib = ['x', 'x^2', 'x^3', 'x^4', 'exp', 'log', 'sqrt', 'tanh', 'sin', 'tan', 'abs', 'cos']
model.auto_symbolic(lib=lib)

In [None]:
func1 = model.symbolic_formula()[0][0]
expr_func1 = sp.lambdify(list(func1.free_symbols), formula, "numpy")
results1 = expr_func1(**{symbol.name: np.array(dataset['test_input'][:, i]) for i,symbol in enumerate(ex_round(func1,2).free_symbols)})

func2 = model.symbolic_formula()[0][1]
expr_func2 = sp.lambdify(list(func2.free_symbols), formula, "numpy")
results2 = expr_func2(**{symbol.name: np.array(dataset['test_input'][:, i]) for i,symbol in enumerate(ex_round(func2,2).free_symbols)})

func3 = model.symbolic_formula()[0][2]
expr_func3 = sp.lambdify(list(func3.free_symbols), formula, "numpy")
results3 = expr_func3(**{symbol.name: np.array(dataset['test_input'][:, i]) for i,symbol in enumerate(ex_round(func3,2).free_symbols)})

func4 = model.symbolic_formula()[0][3]
expr_func4 = sp.lambdify(list(func4.free_symbols), formula, "numpy")
results4 = expr_func4(**{symbol.name: np.array(dataset['test_input'][:, i]) for i,symbol in enumerate(ex_round(func4,2).free_symbols)})

func5 = model.symbolic_formula()[0][4]
expr_func5 = sp.lambdify(list(func5.free_symbols), formula, "numpy")
results5 = expr_func5(**{symbol.name: np.array(dataset['test_input'][:, i]) for i,symbol in enumerate(ex_round(func5,2).free_symbols)})

func6 = model.symbolic_formula()[0][5]
expr_func6 = sp.lambdify(list(func6.free_symbols), formula, "numpy")
results6 = expr_func6(**{symbol.name: np.array(dataset['test_input'][:, i]) for i,symbol in enumerate(ex_round(func6,2).free_symbols)})


In [None]:
y_pred = encoder.inverse_transform(np.argmax([results1, results2, results3, results4, results5, results6], axis=0).reshape(-1, 1)).flatten()

In [None]:
plt.figure(figsize=(15,5))
plt.scatter(np.arange(y_test.shape[0]), y_test)  # np.round(results,0)
plt.scatter(np.arange(y_test.shape[0]), y_pred)
plt.title('PREDICTION')
plt.show()