In [1]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt

import sklearn  
from sklearn.datasets import make_regression
from sklearn import linear_model, datasets
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

from tqdm import tqdm

%matplotlib inline
matplotlib.rcParams.update({'font.size': 22})

In [2]:
df = pd.read_csv("data/augmented_data.csv")

In [3]:
#Separate X and y data
X = df.drop(columns =['Unnamed: 0','Unnamed: 0.1','Diameter_nm','Absorbance max (nm)', 'PL max (nm)'], 
                  inplace = False, axis = 1) #keep synthesis parameters that matter
y_size = df['Diameter_nm'].values #will predict diameter
y_abs = df['Absorbance max (nm)'].values #will predict absorbance
y_PL = df['PL max (nm)'].values #will predict PL

# Size prediction

In [4]:
#split dataset into train and test set
X_train, X_test, y_size_train, y_size_test = train_test_split(X, y_size, test_size=0.25, random_state=8, shuffle=True)

In [18]:
svr = SVR(kernel='linear')
svr.fit(X_train,y_size_train)
y_size_predict = svr.predict(X_test)
MSE = mean_squared_error(y_size_test, y_size_predict)
R2 = r2_score(y_size_test, y_size_predict)

print("MSE", MSE, "R2", R2)

MSE 1.3470466909895378 R2 0.2559359223408595


In [19]:
rbf = SVR(kernel='rbf')
rbf.fit(X_train,y_size_train)
y_size_predict = rbf.predict(X_test)
MSE = mean_squared_error(y_size_test, y_size_predict)
R2 = r2_score(y_size_test, y_size_predict)

print("MSE", MSE, "R2", R2)

MSE 1.2674515046247017 R2 0.29990167298989856


# Abs prediction

In [8]:
#split dataset into train and test set
X_train, X_test, y_abs_train, y_abs_test = train_test_split(X, y_abs, test_size=0.25, random_state=8, shuffle=True)

In [23]:
svr = SVR(kernel='linear')
svr.fit(X_train,y_abs_train)
y_abs_predict = svr.predict(X_test)
MSE = mean_squared_error(y_abs_test, y_abs_predict)
R2 = r2_score(y_abs_test, y_abs_predict)

print("MSE", MSE, "R2", R2)

MSE 2918.78117931605 R2 0.4295622701825158


In [21]:
rbf = SVR(kernel='rbf')
rbf.fit(X_train,y_abs_train)
y_abs_predict = rbf.predict(X_test)
MSE = mean_squared_error(y_abs_test, y_abs_predict)
R2 = r2_score(y_abs_test, y_abs_predict)

print("MSE", MSE, "R2", R2)

MSE 5311.576254813718 R2 -0.038078332839732054


# PL prediction

In [10]:
#split dataset into train and test set
X_train, X_test, y_PL_train, y_PL_test = train_test_split(X, y_PL, test_size=0.25, random_state=8, shuffle=True)

In [11]:
svr = SVR(kernel='linear')
svr.fit(X_train,y_PL_train)
y_PL_predict = svr.predict(X_test)
MSE = mean_squared_error(y_PL_test, y_PL_predict)
R2 = r2_score(y_PL_test, y_PL_predict)

print("MSE", MSE, "R2", R2)

MSE 1576.6251639257027 R2 0.09903773015710615


In [25]:
rbf = SVR(kernel='rbf')
rbf.fit(X_train,y_PL_train)
y_PL_predict = rbf.predict(X_test)
MSE = mean_squared_error(y_PL_test, y_PL_predict)
R2 = r2_score(y_PL_test, y_PL_predict)

print("MSE", MSE, "R2", R2)

MSE 1638.8864240373987 R2 0.0634585401777713
