In [8]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
import numpy as np
from sklearn.tree import DecisionTreeRegressor
import warnings
warnings.filterwarnings("ignore")

df = pd.read_json('./dataset/data.json')
data = pd.DataFrame(df['features'].tolist())

kf = KFold(n_splits=5, shuffle=True, random_state=42)
model = DecisionTreeRegressor(max_depth=5)
R2 = []
MSE = []
MAE = []
RMSE = []
for train_index, test_index in kf.split(data):
    X_train, X_test = data.iloc[train_index], data.iloc[test_index]
    y_train, y_test = df["LogS"].iloc[train_index], df["LogS"].iloc[test_index]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("R2_score:"+str(r2_score(y_test,y_pred)))
    print("MSE:"+str(mean_squared_error(y_test,y_pred)))
    print("MAE:"+str(mean_absolute_error(y_test,y_pred)))
    print("RMSE:"+str(np.sqrt(mean_squared_error(y_test,y_pred))))

    R2.append(r2_score(y_test,y_pred))
    MSE.append(mean_squared_error(y_test,y_pred))
    MAE.append(mean_absolute_error(y_test,y_pred))
    RMSE.append(np.sqrt(mean_squared_error(y_test,y_pred)))

print(f"Average R2: {np.mean(R2)}")
print(f"Average MSE: {np.mean(MSE)}")
print(f"Average MAE: {np.mean(MAE)}")
print(f"Average RMSE: {np.mean(RMSE)}")

R2_score:0.6630617158160229
MSE:1.5520832860403213
MAE:0.9360478452838531
RMSE:1.2458263466632584
R2_score:0.6155787089606821
MSE:1.7120575172044452
MAE:0.9801369367938223
RMSE:1.3084561579221694
R2_score:0.6170166766122316
MSE:1.624362051386452
MAE:0.9404284906634204
RMSE:1.2745046298018898
R2_score:0.6086929357778659
MSE:1.7153570334368147
MAE:0.9804319218797614
RMSE:1.3097163942765682
R2_score:0.6443417383589034
MSE:1.6307495611319955
MAE:0.9633172837171446
RMSE:1.277008050535311
Average R2: 0.6297383551051412
Average MSE: 1.6469218898400058
Average MAE: 0.9600724956676003
Average RMSE: 1.2831023158398396


In [4]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
import numpy as np
from sklearn.linear_model import Ridge
import warnings
warnings.filterwarnings("ignore")

df = pd.read_json('./dataset/data.json')
data = pd.DataFrame(df['features'].tolist())

kf = KFold(n_splits=5, shuffle=True, random_state=42)
model = Ridge(alpha=1.0, random_state=42)
R2 = []
MSE = []
MAE = []
RMSE = []
for train_index, test_index in kf.split(data):
    X_train, X_test = data.iloc[train_index], data.iloc[test_index]
    y_train, y_test = df["LogS"].iloc[train_index], df["LogS"].iloc[test_index]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("R2_score:"+str(r2_score(y_test,y_pred)))
    print("MSE:"+str(mean_squared_error(y_test,y_pred)))
    print("MAE:"+str(mean_absolute_error(y_test,y_pred)))
    print("RMSE:"+str(np.sqrt(mean_squared_error(y_test,y_pred))))

    R2.append(r2_score(y_test,y_pred))
    MSE.append(mean_squared_error(y_test,y_pred))
    MAE.append(mean_absolute_error(y_test,y_pred))
    RMSE.append(np.sqrt(mean_squared_error(y_test,y_pred)))

print(f"Average R2: {np.mean(R2)}")
print(f"Average MSE: {np.mean(MSE)}")
print(f"Average MAE: {np.mean(MAE)}")
print(f"Average RMSE: {np.mean(RMSE)}")

R2_score:0.7360929651000443
MSE:1.215669804126579
MAE:0.8233705742912076
RMSE:1.1025741717120798
R2_score:0.6988852692495577
MSE:1.3410436683371953
MAE:0.8621399281441542
RMSE:1.158034398598416
R2_score:0.6912452349567646
MSE:1.3095335825188819
MAE:0.8439775616279136
RMSE:1.1443485406635874
R2_score:0.6922506557774062
MSE:1.34906841816717
MAE:0.8608908035213649
RMSE:1.1614940456873508
R2_score:0.7188303381696897
MSE:1.2892075120586155
MAE:0.8596823437569946
RMSE:1.1354327421994732
Average R2: 0.7074608926506925
Average MSE: 1.3009045970416884
Average MAE: 0.850012242268327
Average RMSE: 1.1403767797721815


In [6]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
import numpy as np
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings("ignore")

df = pd.read_json('./dataset/data.json')
data = pd.DataFrame(df['features'].tolist())

kf = KFold(n_splits=5, shuffle=True, random_state=42)
pls_model_setup = PLSRegression(scale=True)
param_grid = {'n_components': range(1, 4)}
gsearch = GridSearchCV(pls_model_setup, param_grid)

R2 = []
MSE = []
MAE = []
RMSE = []
for train_index, test_index in kf.split(data):
    X_train, X_test = data.iloc[train_index], data.iloc[test_index]
    y_train, y_test = df["LogS"].iloc[train_index], df["LogS"].iloc[test_index]
    model = gsearch.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("R2_score:"+str(r2_score(y_test,y_pred)))
    print("MSE:"+str(mean_squared_error(y_test,y_pred)))
    print("MAE:"+str(mean_absolute_error(y_test,y_pred)))
    print("RMSE:"+str(np.sqrt(mean_squared_error(y_test,y_pred))))

    R2.append(r2_score(y_test,y_pred))
    MSE.append(mean_squared_error(y_test,y_pred))
    MAE.append(mean_absolute_error(y_test,y_pred))
    RMSE.append(np.sqrt(mean_squared_error(y_test,y_pred)))

print(f"Average R2: {np.mean(R2)}")
print(f"Average MSE: {np.mean(MSE)}")
print(f"Average MAE: {np.mean(MAE)}")
print(f"Average RMSE: {np.mean(RMSE)}")

R2_score:0.7144299661644977
MSE:1.3154589351088337
MAE:0.8557361499123561
RMSE:1.1469345818785106
R2_score:0.667444047297819
MSE:1.4810702008089933
MAE:0.9144353946483971
RMSE:1.2169922763966061
R2_score:0.6664526112958833
MSE:1.4146875006394006
MAE:0.8759049905329318
RMSE:1.1894063648053177
R2_score:0.683018619711691
MSE:1.3895385232232782
MAE:0.8981732096616625
RMSE:1.1787868862620072
R2_score:0.6958675555830085
MSE:1.3944955136723332
MAE:0.8926494980112767
RMSE:1.1808875956975469
Average R2: 0.6854425600105799
Average MSE: 1.3990501346905677
Average MAE: 0.887379848553325
Average RMSE: 1.1826015410079977


In [12]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
import numpy as np
from sklearn.decomposition import PCA
from sklearn import linear_model
import warnings
warnings.filterwarnings("ignore")

df = pd.read_json('./dataset/data.json')
data = pd.DataFrame(df['features'].tolist())

kf = KFold(n_splits=5, shuffle=True, random_state=42)
pca = PCA(n_components=300)
data = pd.DataFrame(pca.fit_transform(data))
lm = linear_model.LinearRegression()

R2 = []
MSE = []
MAE = []
RMSE = []
for train_index, test_index in kf.split(data):
    X_train, X_test = data.iloc[train_index], data.iloc[test_index]
    y_train, y_test = df["LogS"].iloc[train_index], df["LogS"].iloc[test_index]
    lm.fit(X_train,y_train)
    y_pred = lm.predict(X_test)
    print("R2_score:"+str(r2_score(y_test,y_pred)))
    print("MSE:"+str(mean_squared_error(y_test,y_pred)))
    print("MAE:"+str(mean_absolute_error(y_test,y_pred)))
    print("RMSE:"+str(np.sqrt(mean_squared_error(y_test,y_pred))))

    R2.append(r2_score(y_test,y_pred))
    MSE.append(mean_squared_error(y_test,y_pred))
    MAE.append(mean_absolute_error(y_test,y_pred))
    RMSE.append(np.sqrt(mean_squared_error(y_test,y_pred)))

print(f"Average R2: {np.mean(R2)}")
print(f"Average MSE: {np.mean(MSE)}")
print(f"Average MAE: {np.mean(MAE)}")
print(f"Average RMSE: {np.mean(RMSE)}")

R2_score:0.7407500685039562
MSE:1.1942171740935181
MAE:0.8090588132435343
RMSE:1.092802440559829
R2_score:0.6944797289376357
MSE:1.3606641695534034
MAE:0.8695863211804977
RMSE:1.166475104557917
R2_score:0.6877553219931025
MSE:1.324335485982116
MAE:0.8427823415824718
RMSE:1.1507977606782678
R2_score:0.6986109526987109
MSE:1.3211870404557953
MAE:0.849005536877517
RMSE:1.1494290062704158
R2_score:0.7233467685216575
MSE:1.2684989622828564
MAE:0.8496901647617188
RMSE:1.1262765922644653
Average R2: 0.7089885681310125
Average MSE: 1.2937805664735378
Average MAE: 0.8440246355291479
Average RMSE: 1.1371561808661788


In [13]:
import pandas as pd
from sklearn.model_selection import KFold
from sklearn.metrics import r2_score,mean_squared_error,mean_absolute_error
import numpy as np
from sklearn.svm import SVR
import warnings
warnings.filterwarnings("ignore")

df = pd.read_json('./dataset/data.json')
data = pd.DataFrame(df['features'].tolist())

kf = KFold(n_splits=5, shuffle=True, random_state=42)
model = SVR(gamma='auto')
R2 = []
MSE = []
MAE = []
RMSE = []
for train_index, test_index in kf.split(data):
    X_train, X_test = data.iloc[train_index], data.iloc[test_index]
    y_train, y_test = df["LogS"].iloc[train_index], df["LogS"].iloc[test_index]
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    print("R2_score:"+str(r2_score(y_test,y_pred)))
    print("MSE:"+str(mean_squared_error(y_test,y_pred)))
    print("MAE:"+str(mean_absolute_error(y_test,y_pred)))
    print("RMSE:"+str(np.sqrt(mean_squared_error(y_test,y_pred))))

    R2.append(r2_score(y_test,y_pred))
    MSE.append(mean_squared_error(y_test,y_pred))
    MAE.append(mean_absolute_error(y_test,y_pred))
    RMSE.append(np.sqrt(mean_squared_error(y_test,y_pred)))

print(f"Average R2: {np.mean(R2)}")
print(f"Average MSE: {np.mean(MSE)}")
print(f"Average MAE: {np.mean(MAE)}")
print(f"Average RMSE: {np.mean(RMSE)}")

R2_score:0.7177412227077957
MSE:1.3002058570886519
MAE:0.7962645566894185
RMSE:1.1402656958308672
R2_score:0.6790123345405044
MSE:1.4295497111881565
MAE:0.8437809362317182
RMSE:1.1956377842758887
R2_score:0.6641814697437481
MSE:1.4243201815561097
MAE:0.8312618586905034
RMSE:1.193448860050614
R2_score:0.693042191260346
MSE:1.3455985959175392
MAE:0.8336943387682657
RMSE:1.159999394791885
R2_score:0.6897697355024885
MSE:1.4224549862691047
MAE:0.8438658026772626
RMSE:1.1926671733007095
Average R2: 0.6887493907509766
Average MSE: 1.3844258664039124
Average MAE: 0.8297734986114337
Average RMSE: 1.1764037816499928
