<a href="https://colab.research.google.com/github/daniela-figueroa/CHEMENG177/blob/main/lasso_final1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Lasso Regression for Ionic Conductivity

### Import Libraries

In [34]:
pip install plotly




In [35]:
import numpy as np
import pandas as pd
import plotly

from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score, mean_squared_error


### Import data

In [36]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [37]:
elyte_conductivity_data = pd.read_csv("/content/drive/My Drive/Pre-processed CALiSol Data.csv")
print(elyte_conductivity_data.columns)

Index(['Unnamed: 0', 'doi', 'k', 'T', 'c', 'salt', 'c units',
       'solvent ratio type', 'EC', 'PC', 'DMC', 'EMC', 'DEC', 'DME', 'DMSO',
       'AN', 'MOEMC', 'TFP', 'EA', 'MA', 'FEC', 'DOL', '2-MeTHF', 'DMM',
       'Freon 11', 'Methylene chloride', 'THF', 'Toluene', 'Sulfolane',
       '2-Glyme', '3-Glyme', '4-Glyme', '3-Me-2-Oxazolidinone',
       '3-MeSulfolane', 'Ethyldiglyme', 'DMF', 'Ethylbenzene',
       'Ethylmonoglyme', 'Benzene', 'g-Butyrolactone', 'Cumene',
       'Propylsulfone', 'Pseudocumeme', 'TEOS', 'm-Xylene', 'o-Xylene'],
      dtype='object')


In [38]:
print(f'Number of solvents: {len(elyte_conductivity_data.columns) - 9}')

Number of solvents: 37


### Arrange data

In [39]:
# elyte_conductivity_data.drop(['c units', 'solvent ratio type'], axis=1)
cols = elyte_conductivity_data.columns[3:]

X = elyte_conductivity_data[cols]
X = X.drop(['c units', 'solvent ratio type'],axis=1)
y = elyte_conductivity_data['k']

lambdas = [.1, 1, 10, 100]


### Select data from most interesting salts
#### LiPF6, LiBF4, LiAsF6, LiBOB

In [40]:
# I wish I could write beautiful, object oriented code here but that just wouldn't be me now would it
# LiPF6
indices_LiPF6 = X[X['salt'] == 'LiPF6'].index
X_LiPF6 = X[X['salt'] == 'LiPF6'].drop(['salt'], axis=1)
y_LiPF6 = y.loc[indices_LiPF6]

# LiBF4
indices_LiBF4 = X[X['salt'] == 'LiBF4'].index
X_LiBF4 = X[X['salt'] == 'LiBF4'].drop(['salt'], axis=1)
y_LiBF4 = y.loc[indices_LiBF4]

# LiAsF6
indices_LiAsF6 = X[X['salt'] == 'LiAsF6'].index
X_LiAsF6 = X[X['salt'] == 'LiAsF6'].drop(['salt'], axis=1)
y_LiAsF6 = y.loc[indices_LiAsF6]

# LiBOB
indices_LiBOB = X[X['salt'] == 'LiBOB'].index
X_LiBOB = X[X['salt'] == 'LiBOB'].drop(['salt'], axis=1)
y_LiBOB = y.loc[indices_LiBOB]

### Function for implementing lasso regression and evaluation

In [41]:
def lassoregress(X, y, a):
    lasso = Lasso(alpha=a)
    # Test-train split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, train_size=0.8, random_state=137)
    lasso.fit(X_train, y_train)

    # Cross-validation for best alpha
    lasso_cv = LassoCV(alphas=lambdas)
    lasso_cv.fit(X_train, y_train)
    best_lambda = lasso_cv.alpha_
    print(f'Best lambda is {best_lambda}')

    # Evaluation
    y_pred = lasso.predict(X_test)
    r2 = r2_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    return r2, mse

r2s = np.zeros(4)
mses = np.zeros(4)

### LiPF6 Lasso Regression ("1")

In [42]:
r2_LiPF6, mse_LiPF6 = lassoregress(X_LiPF6,y_LiPF6,0.1)
print(f"R² Score: {r2_LiPF6:.4f}")
print(f"MSE: {mse_LiPF6:.4f}")
r2s[0] = r2_LiPF6
mses[0] = mse_LiPF6

Best lambda is 0.1
R² Score: 0.7807
MSE: 3.9166


### LiBF4 Lasso Regression ("2")

In [43]:
r2_LiBF4, mse_LiBF4 = lassoregress(X_LiBF4,y_LiBF4,10.0)
print(f"R² Score: {r2_LiBF4:.4f}")
print(f"MSE: {mse_LiBF4:.4f}")
r2s[1] = r2_LiBF4
mses[1] = mse_LiBF4

Best lambda is 0.1
R² Score: 0.6566
MSE: 0.9768


### LiAsF6 Lasso Regression ("3")

In [44]:
r2_LiAsF6, mse_LiAsF6 = lassoregress(X_LiAsF6,y_LiAsF6,10.0)
print(f"R² Score: {r2_LiAsF6:.4f}")
print(f"MSE: {mse_LiAsF6:.4f}")
r2s[2] = r2_LiAsF6
mses[2] = mse_LiAsF6

Best lambda is 0.1
R² Score: 0.4607
MSE: 20.5842


### LiBOB Lasso Regression ("4")

In [45]:
r2_LiBOB, mse_LiBOB = lassoregress(X_LiBOB,y_LiBOB,1.0)
print(f"R² Score: {r2_LiBOB:.4f}")
print(f"MSE: {mse_LiBOB:.4f}")
r2s[3] = r2_LiBOB
mses[3] = mse_LiBOB

Best lambda is 0.1
R² Score: 0.7022
MSE: 4.2779


### Listing Data

In [46]:
print(f'The R² scores in order: {r2s}; and the MSEs: {mses}')

The R² scores in order: [0.78072416 0.65657036 0.46069696 0.7021824 ]; and the MSEs: [ 3.91660642  0.97680389 20.58419572  4.27793072]
