In [None]:
import numpy as np
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt

import sklearn  
from sklearn.datasets import make_regression
from sklearn import linear_model, datasets
from sklearn.linear_model import Ridge
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

from tqdm import tqdm

%matplotlib inline
matplotlib.rcParams.update({'font.size': 22})

In [None]:
df = pd.read_csv("data/augmented_data.csv")

In [None]:
df.head()

In [None]:
df.shape

In [None]:
#Separate X and y data
X_size = df.drop(columns =['Unnamed: 0','Unnamed: 0.1','Diameter_nm','Absorbance max (nm)', 'PL max (nm)'], 
                  inplace = False, axis = 1) #keep synthesis parameters that matter
y_size = df['Diameter_nm'].values #will predict diameter

In [None]:
#Separate X and y data
X_abs = df.drop(columns =['Unnamed: 0','Unnamed: 0.1','Diameter_nm','Absorbance max (nm)', 'PL max (nm)'], 
                  inplace = False, axis = 1) #keep synthesis parameters that matter
y_abs = df['Absorbance max (nm)'].values #will predict diameter

In [None]:
#Separate X and y data
X_PL = df.drop(columns =['Unnamed: 0','Unnamed: 0.1','Diameter_nm','Absorbance max (nm)', 'PL max (nm)'], 
                  inplace = False, axis = 1) #keep synthesis parameters that matter
y_PL = df['PL max (nm)'].values #will predict diameter

In [None]:
#split dataset into train and test set
X_PL_train, X_PL_test, y_PL_train, y_PL_test = train_test_split(X_PL, y_PL, test_size=0.25, random_state=8, shuffle=True)

In [None]:
def set_splitter(random_state):
    X_size_train, X_size_test, y_size_train, y_size_test = train_test_split(X_size, y_size, test_size=0.25, random_state=random_state, shuffle=True)

In [None]:
def ridge_estimator(X_train, X_test, y_train, y_test, alpha):
    ridge = Ridge(alpha=alpha)
    ridge.fit(X_train,y_train)
    y_predict = ridge.predict(X_test)
    return mean_squared_error(y_test, y_predict)

In [None]:
def lowest_MSE_calculator(X_train, X_test, y_train, y_test, low, high, step):
    MSE_dict = {}
    
    for alpha in np.arange (low, high, step):
        MSE = ridge_estimator(X_train, X_test, y_train, y_test, alpha)
        MSE_dict[alpha] = MSE
    
    print("best alpha = ", min(MSE_dict, key=MSE_dict.get))

In [None]:
def wrap_fn(X_train, X_test, y_train, y_test):
    for random_state in range (0,100):
        set_splitter(random_state)
        lowest_MSE_calculator(X_train, X_test, y_train, y_test, 0, 1, 0.01)
        print(random_state)

# SIZE PREDICTION

In [None]:
wrap_fn(X_size_train, X_size_test, y_size_train, y_size_test)

In [None]:
#split dataset into train and test set
X_size_train, X_size_test, y_size_train, y_size_test = train_test_split(X_size, y_size, test_size=0.25, random_state=8, shuffle=True)

In [None]:
lowest_MSE_calculator(X_size_train, X_size_test, y_size_train, y_size_test, 0.0001, 1, 0.0001)

In [None]:
ridge = Ridge(alpha=0.050800000000000005)
ridge.fit(X_size_train,y_size_train)
y_size_predict = ridge.predict(X_size_test)
mean_squared_error(y_size_test, y_size_predict)

In [None]:
plt.figure()
plt.title("")
plt.plot(y_size_test,'x', color='magenta', label='Test')
plt.plot(y_size_predict,'x', label='Predicted')
plt.xlabel('Sample Number')
plt.ylabel('Diameter (nm)')
plt.legend()
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
plt.show()

In [None]:
plt.figure()
plt.title("")
plt.plot(y_size_test, y_size_predict, 'o')
plt.xlabel('Observed Values (nm)')
plt.ylabel('Predicted Values (nm)')
plt.show()

# ABSORBANCE PREDICTION

In [None]:
wrap_fn(X_abs_train, X_abs_test, y_abs_train, y_abs_test)

In [None]:
#split dataset into train and test set
X_abs_train, X_abs_test, y_abs_train, y_abs_test = train_test_split(X_abs, y_abs, test_size=0.25, random_state=8, shuffle=True)

In [None]:
lowest_MSE_calculator(X_abs_train, X_abs_test, y_abs_train, y_abs_test, 0.0001, 1, 0.0001)

In [None]:
ridge = Ridge(alpha=0.0476)
ridge.fit(X_abs_train,y_abs_train)
y_abs_predict = ridge.predict(X_abs_test)
mean_squared_error(y_abs_test, y_abs_predict)

In [None]:
plt.figure()
plt.title("")
plt.plot(y_abs_test,'x', color='magenta', label='Test')
plt.plot(y_abs_predict,'x', label='Predicted')
plt.xlabel('Sample Number')
plt.ylabel('Absorbance (nm)')
plt.legend()
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
plt.show()

In [None]:
plt.figure()
plt.title("")
plt.plot(y_abs_test, y_abs_predict, 'o')
plt.xlabel('Observed Values (nm)')
plt.ylabel('Predicted Values (nm)')
plt.show()

# PL PREDICTION

In [None]:
wrap_fn(X_PL_train, X_PL_test, y_PL_train, y_PL_test)

In [None]:
lowest_MSE_calculator(X_PL_train, X_PL_test, y_PL_train, y_PL_test, 0.0001, 1, 0.0001)

In [None]:
ridge = Ridge(alpha=0.057800000000000004)
ridge.fit(X_PL_train,y_PL_train)
y_PL_predict = ridge.predict(X_PL_test)
mean_squared_error(y_PL_test, y_PL_predict)

In [None]:
plt.figure()
plt.title("")
plt.plot(y_PL_test,'x', color='magenta', label='Test')
plt.plot(y_PL_predict,'x', label='Predicted')
plt.xlabel('Sample Number')
plt.ylabel('PL (nm)')
plt.legend()
plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left', fontsize='small')
plt.show()

In [None]:
plt.figure()
plt.title("")
plt.plot(y_PL_test, y_PL_predict, 'o')
plt.xlabel('Observed Values (nm)')
plt.ylabel('Predicted Values (nm)')
plt.show()