## 1. Execute imports

In [1]:
import pandas as pd
import numpy as np
from patsy import ModelDesc, dmatrices, dmatrix, demo_data
import re
import pprint
import json

## 2. Create complex operations dict

In [2]:
# TODO: add more complex operations from numpy
COMPLEX_OPERATIONS = {
    'arcsin': 'np.arcsin',
    'cos': 'np.cos',
    'tan': 'np.tan',
    'log': 'np.log',
    'log10': 'np.log10',
    'log2': 'np.log2'
}

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

## 3. Execute functions

In [3]:
def add_blank_spaces_to_formula(formula: str) -> str:
    new = ''
    for index, element in enumerate(formula):
        next_idx = index + 1
        if next_idx < len(formula):
            if not re.match('\w', formula[index+1]):
                new += element + ' '
            else:
                new += element
        else:
            new += element + ' '
    return new

In [4]:
def clean_formula(formula: str) -> str:
    result = formula
    for operation in COMPLEX_OPERATIONS:
        if(operation in formula):
            result = formula.replace(operation, "")
    return result

def get_formula_variables(formula: str):
  '''
  Returns a list of every variable (non repeated) from the formula
  '''
  cleaned_formula = clean_formula(formula)
  return sorted(list(set("".join(re.findall("[a-zA-Z]+", cleaned_formula)))))

def group_columns(formula: str, data: pd.DataFrame):
  # get number of variables inside formula
  # convert string to set that only holds unique elements
  characters = get_formula_variables(formula=formula)
  
  # get dataset number of columns
  columns = len(data.columns)
  columns_lst = list(data.columns)
  characters_len = len(characters)

  result = []
  
  # column by column
  for i in range(0, columns):  
    # current column + 1 and substract 1 from characters so we don't count current character
    for j in range(i+1, columns, characters_len-1):
      column_variables = [columns_lst[i]]
      column_variables.extend(columns_lst[j:j+(characters_len-1)])
      # compare numbers and group columns by number of variables inside the formula
      if(len(column_variables) == characters_len):
        result.append(column_variables)
  return result # grouped columns

In [5]:
def get_formula_by_columns(formula: str, columns: list) -> dict:
  '''
  Mapping every single formula's variable to a column.
  '''
  to_replace = {}

  # formula variables
  variables = get_formula_variables(formula=formula)
  # iterate over grouped columns
  for cidx, column_group in enumerate(columns):
    formula_grouped = {}
    # iterate over variables
    for idx, variable in enumerate(variables):
      # variable paired to column name
      formula_grouped[variable] = column_group[idx]
    # every column group represents a key
    to_replace[cidx] = formula_grouped
  return to_replace

In [6]:
def parse_formula(formula: str, formula_columns: dict) -> list:
  '''
  Parses, effectively, every grouped column to a real formula. 
  In simple words, replaces every formula variable for its paired column.
  '''
  result = []
  formula_variables = re.findall(r'\w+', formula)

  for variables_paired in formula_columns.values():
        new_formula = formula
        for variable in formula_variables:
            if variable in variables_paired:
                # we need to put a blank space after a single character, 
                # so we can identify it then with the regex
                replace_regex = f'{variable}(?:[^\w\*\\\+\(\)])'
                new_formula = re.sub(replace_regex, variables_paired[variable], new_formula)
            elif variable in COMPLEX_OPERATIONS:
                new_formula = new_formula.replace(variable, COMPLEX_OPERATIONS[variable])
        new_formula = new_formula.replace(" ", "")

        result.append(new_formula)
  
  return result

In [7]:
def execute_formula(formula_by_columns: list, data: pd.DataFrame) -> pd.DataFrame:
  '''
  Take every real formula and executes it via patsy dmatrix.
  Saves every formula result inside a new dataframe's column.
  '''
  new_df = data.copy()
     
  for formula_columns in formula_by_columns:
    result_items = []
    add_data = True
    try:
        formula = "I("+formula_columns+")-1"
        result = dmatrix(formula, data, NA_action='raise')
        for item in result:
            result_items.append(item.item())
    except:
        # Ignore Patsy error.
        add_data = False
        
    if add_data:
        if "np." in formula_columns:
            new_df[formula_columns.replace('np.', '')] = result_items
        else:
            new_df[formula_columns] = result_items
    else:
        print(f"{bcolors.WARNING}Your data has some invalid values. Script will ignore them and their possible result.{bcolors.ENDC}")
        
  return new_df

In [8]:
def execute(formula_input: str, data: pd.DataFrame) -> pd.DataFrame:
    
    formula = add_blank_spaces_to_formula(formula_input.lower())
    grouped_columns = group_columns(formula, data)
    
    replaceable_result = get_formula_by_columns(formula, grouped_columns)

    executable_formulas = parse_formula(formula, replaceable_result)
    new_data = execute_formula(executable_formulas, data)
    
    return new_data

## 4. Play around
Just execute the function `execute` that receives a formula and a pandas' dataframe

In [9]:
df = pd.read_csv('winequality-white.csv', delimiter=';')
x = df.drop(['quality'], axis=1)
x.columns = x.columns.str.replace(' ','_')

execute(formula_input="(a*cos(b))/c", data=x).head(10)

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,...,(residual_sugar*cos(pH))/sulphates,(chlorides*cos(free_sulfur_dioxide))/total_sulfur_dioxide,(chlorides*cos(density))/pH,(chlorides*cos(sulphates))/alcohol,(free_sulfur_dioxide*cos(total_sulfur_dioxide))/density,(free_sulfur_dioxide*cos(pH))/sulphates,(total_sulfur_dioxide*cos(density))/pH,(total_sulfur_dioxide*cos(sulphates))/alcohol,(density*cos(pH))/sulphates,(pH*cos(sulphates))/alcohol
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,...,-45.539655,0.000139,0.008092,0.004605,42.167596,-98.99925,30.569432,17.395001,-2.202183,0.306971
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,...,-3.224424,5.1e-05,0.008097,0.004551,14.064649,-28.213708,21.813655,12.259783,-2.003173,0.306495
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,...,-15.572015,8e-05,0.00835,0.004479,-27.891092,-67.704412,16.198971,8.689199,-2.245755,0.292029
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,...,-21.225108,-0.000309,0.009891,0.005396,-37.694323,-117.36236,31.719095,17.304782,-2.486084,0.296786
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,...,-21.225108,-0.000309,0.009891,0.005396,-37.694323,-117.36236,31.719095,17.304782,-2.486084,0.296786
5,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,...,-15.572015,8e-05,0.00835,0.004479,-27.891092,-67.704412,16.198971,8.689199,-2.245755,0.292029
6,6.2,0.32,0.16,7.0,0.045,30.0,136.0,0.9949,3.18,0.47,...,-14.882633,5.1e-05,0.007706,0.004179,-18.470647,-63.782715,23.290503,12.630551,-2.115247,0.295332
7,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,...,-45.539655,0.000139,0.008092,0.004605,42.167596,-98.99925,30.569432,17.395001,-2.202183,0.306971
8,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,...,-3.224424,5.1e-05,0.008097,0.004551,14.064649,-28.213708,21.813655,12.259783,-2.003173,0.306495
9,8.1,0.22,0.43,1.5,0.044,28.0,129.0,0.9938,3.22,0.45,...,-3.323092,-0.000328,0.007454,0.003602,-27.642337,-62.031058,21.854243,10.559789,-2.201659,0.263585


In [10]:
execute(formula_input="(a*cos(b))/c", data=x).head(10)

Unnamed: 0,fixed_acidity,volatile_acidity,citric_acid,residual_sugar,chlorides,free_sulfur_dioxide,total_sulfur_dioxide,density,pH,sulphates,...,(residual_sugar*cos(pH))/sulphates,(chlorides*cos(free_sulfur_dioxide))/total_sulfur_dioxide,(chlorides*cos(density))/pH,(chlorides*cos(sulphates))/alcohol,(free_sulfur_dioxide*cos(total_sulfur_dioxide))/density,(free_sulfur_dioxide*cos(pH))/sulphates,(total_sulfur_dioxide*cos(density))/pH,(total_sulfur_dioxide*cos(sulphates))/alcohol,(density*cos(pH))/sulphates,(pH*cos(sulphates))/alcohol
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,...,-45.539655,0.000139,0.008092,0.004605,42.167596,-98.99925,30.569432,17.395001,-2.202183,0.306971
1,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,...,-3.224424,5.1e-05,0.008097,0.004551,14.064649,-28.213708,21.813655,12.259783,-2.003173,0.306495
2,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,...,-15.572015,8e-05,0.00835,0.004479,-27.891092,-67.704412,16.198971,8.689199,-2.245755,0.292029
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,...,-21.225108,-0.000309,0.009891,0.005396,-37.694323,-117.36236,31.719095,17.304782,-2.486084,0.296786
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.9956,3.19,0.4,...,-21.225108,-0.000309,0.009891,0.005396,-37.694323,-117.36236,31.719095,17.304782,-2.486084,0.296786
5,8.1,0.28,0.4,6.9,0.05,30.0,97.0,0.9951,3.26,0.44,...,-15.572015,8e-05,0.00835,0.004479,-27.891092,-67.704412,16.198971,8.689199,-2.245755,0.292029
6,6.2,0.32,0.16,7.0,0.045,30.0,136.0,0.9949,3.18,0.47,...,-14.882633,5.1e-05,0.007706,0.004179,-18.470647,-63.782715,23.290503,12.630551,-2.115247,0.295332
7,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.001,3.0,0.45,...,-45.539655,0.000139,0.008092,0.004605,42.167596,-98.99925,30.569432,17.395001,-2.202183,0.306971
8,6.3,0.3,0.34,1.6,0.049,14.0,132.0,0.994,3.3,0.49,...,-3.224424,5.1e-05,0.008097,0.004551,14.064649,-28.213708,21.813655,12.259783,-2.003173,0.306495
9,8.1,0.22,0.43,1.5,0.044,28.0,129.0,0.9938,3.22,0.45,...,-3.323092,-0.000328,0.007454,0.003602,-27.642337,-62.031058,21.854243,10.559789,-2.201659,0.263585


In [11]:
df2 = pd.DataFrame(np.array([[0, 2, 3], [-1, -1, 6], [7, 8, 9]]), columns=['c1', 'c2', 'c3'])
df2

Unnamed: 0,c1,c2,c3
0,0,2,3
1,-1,-1,6
2,7,8,9


In [12]:
execute(formula_input="LOG(b+a)", data=df2)

[93mYour data has some invalid values. Script will ignore them and their possible result.[0m


  result = getattr(ufunc, method)(*inputs, **kwargs)


Unnamed: 0,c1,c2,c3,log(c3+c1),log(c3+c2)
0,0,2,3,1.098612,1.609438
1,-1,-1,6,1.609438,1.609438
2,7,8,9,2.772589,2.833213


In [13]:
data = pd.read_csv('glass.csv', delimiter=',')
data

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,Type
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,1
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,1
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,1
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,1
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,1
...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,7
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,7
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,7
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,7


In [14]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import cross_val_score

In [15]:
data.isnull().sum().sum()

0

In [16]:
X = data.drop('Type', axis=1)
y = data['Type']

In [17]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
from sklearn import metrics
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42)

In [18]:
reg_knn = KNeighborsClassifier()
reg_knn.fit(X_train, y_train)
y_pred = reg_knn.predict(X_test)
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.56      0.82      0.67        11
           2       0.69      0.64      0.67        14
           3       0.50      0.33      0.40         3
           5       1.00      0.25      0.40         4
           6       0.50      0.33      0.40         3
           7       0.78      0.88      0.82         8

    accuracy                           0.65        43
   macro avg       0.67      0.54      0.56        43
weighted avg       0.68      0.65      0.63        43



In [19]:
y_predict = reg_knn.predict(X_test)
accuracy_score(y_test, y_predict)

0.6511627906976745

In [20]:
print('Accuracy of K-NN classifier on set: ', reg_knn.score(X, y))

Accuracy of K-NN classifier on set:  0.7102803738317757


In [21]:
print(cross_val_score(reg_knn, X, y, cv=10).mean())

0.6454545454545455




In [22]:
from sklearn.linear_model import LogisticRegressionCV

In [23]:
reg_log = LogisticRegressionCV(cv=10)
reg_log.fit(X_train, y_train)
y_pred = reg_log.predict(X_test)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

In [24]:
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.69      0.82      0.75        11
           2       0.59      0.71      0.65        14
           3       0.00      0.00      0.00         3
           5       1.00      0.50      0.67         4
           6       1.00      0.67      0.80         3
           7       0.78      0.88      0.82         8

    accuracy                           0.70        43
   macro avg       0.68      0.60      0.61        43
weighted avg       0.68      0.70      0.67        43



  _warn_prf(average, modifier, msg_start, len(result))


In [25]:
y_predict = reg_log.predict(X_test)
accuracy_score(y_predict, y_test)

0.6976744186046512

In [26]:
print(cross_val_score(reg_log, X, y, cv=10).mean())

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

0.6071428571428572


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [27]:
from sklearn.svm import SVC

In [28]:
reg_svc = SVC(kernel='linear')
reg_svc.fit(X_train, y_train)
y_pred = reg_svc.predict(X_test)
print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           1       0.69      0.82      0.75        11
           2       0.67      0.71      0.69        14
           3       0.00      0.00      0.00         3
           5       0.80      1.00      0.89         4
           6       1.00      0.67      0.80         3
           7       0.88      0.88      0.88         8

    accuracy                           0.74        43
   macro avg       0.67      0.68      0.67        43
weighted avg       0.70      0.74      0.72        43



  _warn_prf(average, modifier, msg_start, len(result))


In [29]:
y_predict = reg_svc.predict(X_test)
accuracy_score(y_predict, y_test)

0.7441860465116279

In [None]:
print(cross_val_score(reg_svc, X, y, cv=10).mean())

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
reg_rf = RandomForestClassifier()
reg_rf.fit(X_train, y_train)
y_pred = reg_rf.predict(X_test)
print(metrics.classification_report(y_test, y_pred))

In [None]:
y_predict = reg_rf.predict(X_test)
accuracy_score(y_predict, y_test)

In [None]:
print(cross_val_score(reg_rf, X, y, cv=10).mean())

In [46]:
new_dt = execute(formula_input="COS(A+B)", data=X)

In [47]:
new_dt

Unnamed: 0,RI,Na,Mg,Al,Si,K,Ca,Ba,Fe,cos(RI+Na),...,cos(Si+K),cos(Si+Ca),cos(Si+Ba),cos(Si+Fe),cos(K+Ca),cos(K+Ba),cos(K+Fe),cos(Ca+Ba),cos(Ca+Fe),cos(Ba+Fe)
0,1.52101,13.64,4.49,1.10,71.78,0.06,8.75,0.00,0.0,-0.854113,...,-0.914457,0.407201,-0.888546,-0.888546,-0.816902,0.998201,0.998201,-0.780846,-0.780846,1.000000
1,1.51761,13.89,3.60,1.36,72.73,0.48,7.83,0.00,0.0,-0.955232,...,-0.578939,0.434414,-0.890037,-0.890037,-0.440377,0.886995,0.886995,0.023979,0.023979,1.000000
2,1.51618,13.53,3.55,1.54,72.99,0.39,7.78,0.00,0.0,-0.788898,...,-0.432648,0.612633,-0.742924,-0.742924,-0.310785,0.924909,0.924909,0.073914,0.073914,1.000000
3,1.51766,13.21,3.69,1.29,72.61,0.57,8.22,0.00,0.0,-0.556771,...,-0.603136,0.658924,-0.938212,-0.938212,-0.805203,0.841901,0.841901,-0.357900,-0.357900,1.000000
4,1.51742,13.27,3.62,1.24,73.08,0.55,8.07,0.00,0.0,-0.605388,...,-0.196147,0.862094,-0.679754,-0.679754,-0.693271,0.852525,0.852525,-0.214342,-0.214342,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
209,1.51623,14.14,0.00,2.88,72.61,0.08,9.18,1.06,0.0,-0.998662,...,-0.907556,0.994110,-0.156778,-0.938212,-0.986455,0.417595,0.996802,-0.685707,-0.970191,0.488872
210,1.51685,14.92,0.00,1.99,73.06,0.00,8.40,1.59,0.0,-0.745916,...,-0.694286,0.975589,0.732899,-0.694286,-0.519289,-0.019202,1.000000,-0.844470,-0.519289,-0.019202
211,1.52065,14.36,0.00,2.02,73.42,0.00,8.44,1.64,0.0,-0.985127,...,-0.396249,0.984095,0.943346,-0.396249,-0.553048,-0.069148,1.000000,-0.792913,-0.553048,-0.069148
212,1.51651,14.38,0.00,1.94,73.61,0.00,8.48,1.57,0.0,-0.982278,...,-0.215718,0.917682,0.976284,-0.215718,-0.585923,0.000796,1.000000,-0.810833,-0.585923,0.000796
