In [6]:
import pandas as pd
import autograd.numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve
from skopt.plots import plot_convergence, plot_objective, plot_evaluations
from sklearn.metrics import mean_absolute_error, mean_squared_error

In [2]:
def r2_score(y_true, y_pred):
    corrcoef_matrix = np.corrcoef(y_true, y_pred)
    corrcoef = corrcoef_matrix[0, 1]
    return corrcoef**2

In [3]:
def ProcessData(df: pd.DataFrame) -> pd.DataFrame:
    
    df_res = df.copy()
    
    df_res = df_res.astype({col: 'str' for col in df_res.columns if df_res[col].dtype != 'object'})
    
    a_loc = df_res.columns.get_loc('SMILES')
    b_loc = df_res.columns.get_loc('Const_Value')
    c_loc = df_res.columns.get_loc('CH3')
    end_loc = len(df_res.columns)
    
    df_res = df_res.iloc[:, list(range(a_loc, a_loc+1)) + list(range(b_loc, b_loc+1)) + list(range(c_loc, end_loc))]
    df_res = df_res[df_res['CH3'] != 'No']

    #remove all the rows(compounds) where their const values are null
    df_res = df_res.loc[:, (df_res != 0).any(axis=0)]
    
    df_res['Const_Value'] = df_res['Const_Value'].str.replace(',', '.', regex=True).astype(float)

    df_values = df_res.loc[:,'Const_Value':]
    for column in df_values.columns:
        df_res[column] = pd.to_numeric(df_res[column], errors='coerce')
        
    return df_res

In [4]:
def RemoveNullGroups(df: pd.DataFrame) -> pd.DataFrame:
    
    df.loc['sum'] = df.sum()
    df = df.loc[:, df.loc['sum'] != 0]
    
    df = df.drop('sum')
    
    return df