In [2]:
!python install_libraries.py

2024-08-24 16:57:51,947 - INFO - Installation of all libraries completed!


In [2]:
import tensorflow as tf
tf.test.is_gpu_available()

Instructions for updating:
Use `tf.config.list_physical_devices('GPU')` instead.


False

In [3]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 692567347925603406
xla_global_id: -1
]


In [None]:
import pickle
import threading

In [4]:
import os
from config import Config

config = Config(
    data_dir='./data',
    train_data_subdir='train/',
    val_data_subdir='test/',
    test_data_subdir='test/'
)

data_dir = ['test', 'train', 'val']
train_data_dir = ['TRAIN_LGHG2@n10degC_to_25degC_Norm_5Inputs.csv']
val_data_dir = ['01_TEST_LGHG2@n10degC_Norm_(05_Inputs).csv', '02_TEST_LGHG2@0degC_Norm_(05_Inputs).csv', '03_TEST_LGHG2@10degC_Norm_(05_Inputs).csv', '04_TEST_LGHG2@25degC_Norm_(05_Inputs).csv']
test_data_dir = ['01_TEST_LGHG2@n10degC_Norm_(05_Inputs).csv', '02_TEST_LGHG2@0degC_Norm_(05_Inputs).csv', '03_TEST_LGHG2@10degC_Norm_(05_Inputs).csv', '04_TEST_LGHG2@25degC_Norm_(05_Inputs).csv']


In [None]:
import pandas as pd
import numpy as np

## Training Data

In [5]:
train_data_filename = os.listdir(config.get_train_data_dir())[0]
train_data_path = os.path.join(config.get_train_data_dir(), train_data_filename)
train_data = pd.read_csv(train_data_path)
train_data.columns

Index(['V', 'I', 'Temp', 'V_avg', 'I_avg', 'SOC'], dtype='object')

### Extracting Features for Training
In this cell, we extract specific features from the training data for use in model training

In [10]:
X_train = train_data[['V', 'I', 'Temp', 'V_avg', 'I_avg']].values
X_train.shape

(669956, 5)

In [11]:
X_train

array([[0.38514793, 0.75102009, 0.30310108, 0.38514793, 0.75102009],
       [0.38515183, 0.75102009, 0.30459129, 0.38514988, 0.75102009],
       [0.38515573, 0.75102009, 0.3060815 , 0.38515183, 0.75102009],
       ...,
       [0.47884278, 0.75102009, 0.00847709, 0.45983939, 0.75102009],
       [0.4789612 , 0.75102009, 0.00847709, 0.45997861, 0.75102009],
       [0.4789612 , 0.75102009, 0.00847709, 0.46011672, 0.75102009]])

### Extracting Target Variable for Training

In this cell, we extract the target variable from the training data

In [12]:
y_train = train_data['SOC'].values
y_train.shape

(669956,)

In [13]:
y_train

array([0.20641667, 0.20641667, 0.20641667, ..., 0.28324333, 0.28324333,
       0.28324333])

## Validation Data

In [14]:
val_data_filename = os.listdir(config.get_val_data_dir())[3]
val_data_path = os.path.join(config.get_val_data_dir(), val_data_filename)

val_data = pd.read_csv(val_data_path)
val_data

Unnamed: 0,V,I,Temp,V_avg,I_avg,SOC
0,0.966960,0.748900,0.920678,0.966960,0.748900,1.000000
1,0.966020,0.746992,0.920677,0.966490,0.747946,0.999990
2,0.965901,0.746992,0.917845,0.966294,0.747628,0.999983
3,0.965783,0.747098,0.917845,0.966166,0.747496,0.999973
4,0.965665,0.746992,0.917845,0.966066,0.747395,0.999963
...,...,...,...,...,...,...
47512,0.298614,0.751020,0.926344,0.292723,0.751020,0.136623
47513,0.298614,0.751020,0.926344,0.292761,0.751020,0.136623
47514,0.298614,0.751020,0.926344,0.292798,0.751020,0.136623
47515,0.298614,0.751020,0.929177,0.292834,0.751020,0.136623


In [15]:
X_val = val_data[['V', 'I', 'Temp', 'V_avg', 'I_avg']].values
y_val = val_data['SOC'].values

## Data Normalization

In [None]:
import utils

### Normalizing Feature Data

In this cell, we normalize the feature data for training and validation:
- `normalized_X_train = utils.normalize(X_train)` and `normalized_X_val = utils.normalize(X_val)`
    - Applies normalization to the feature set `X_train` and `X_val` using the `normalize` function from the `utils` module.
    - Normalization typically scales the features to a standard range, improving the performance and convergence of learning algorithms.

This step ensures that the feature data is on a comparable scale, which can be crucial for many learning models.

In [16]:
normalized_X_train = utils.normalize(X_train)
normalized_X_train

array([[-0.63365322,  1.        , -1.        , -0.63365322,  1.        ],
       [-0.63908898,  1.        , -1.        , -0.63909772,  1.        ],
       [-0.64456116,  1.        , -1.        , -0.64457869,  1.        ],
       ...,
       [ 0.26690492,  1.        , -1.        ,  0.21572031,  1.        ],
       [ 0.26722387,  1.        , -1.        ,  0.21609528,  1.        ],
       [ 0.26722387,  1.        , -1.        ,  0.21646728,  1.        ]])

In [17]:
normalized_X_val = utils.normalize(X_val)
normalized_X_val

array([[ 1.        , -1.        ,  0.57551079,  1.        , -1.        ],
       [ 0.99571585, -1.        ,  0.58256774,  1.        , -0.99130679],
       [ 0.99642139, -1.        ,  0.55815534,  1.        , -0.99419975],
       ...,
       [-0.98163938,  0.44653221,  1.        , -1.        ,  0.44653221],
       [-0.98183573,  0.44006032,  1.        , -1.        ,  0.44006032],
       [-0.98194943,  0.44002819,  1.        , -1.        ,  0.44002819]])

# Linear Regression

In this cell, we initialize a linear regression model:

- **Create Linear Regression Model**:
  - `lregression_model = linear_model.LinearRegression()`
    - Initializes an instance of the `LinearRegression` class from the `linear_model` module.
    - This model will be used to perform linear regression tasks, predicting a target variable based on input features.

In [18]:
from sklearn import linear_model

lregression_model = linear_model.LinearRegression()
lregression_model

In this cell, we train the linear regression model on the training data:

- **Train the Model**:
  - `lregression_model.fit(X=normalized_X_train, y=y_train)`
    - Fits the linear regression model to the normalized feature data (`normalized_X_train`) and the target variable (`y_train`).
    - The `fit` method adjusts the model's parameters to minimize the difference between predicted and actual values.

This step trains the linear regression model, allowing it to learn the relationships between the features and the target variable.

In [19]:
lregression_model.fit(
    X=normalized_X_train,
    y=y_train
)

In this cell, we save the trained linear regression model to a file:

- **Model File Path**:
  - `lregression_model_path = './models/ml/soc_estimation_ml_lregression.sav'`
    - Specifies the file path where the model will be saved.

- **Save the Model**:
  - `pickle.dump(lregression_model, open(lregression_model_path, 'wb'))`
    - Uses the `pickle` module to serialize and save the `lregression_model` to the specified file path.
    - `open(lregression_model_path, 'wb')` opens the file in write-binary mode.
    - `pickle.dump` writes the model to the file, allowing it to be loaded and used later without retraining.

This step ensures that the trained linear regression model is preserved for future use or deployment.

In [20]:
lregression_model_path = './models/ml/soc_estimation_ml_lregression.sav'
pickle.dump(lregression_model, open(lregression_model_path, 'wb'))

# Decision Tree

In this cell, we initialize a decision tree regressor model:

- **Create Decision Tree Regressor**:
  - `decisiontree_model = tree.DecisionTreeRegressor()`
    - Initializes an instance of the `DecisionTreeRegressor` class from the `tree` module of `sklearn`.
    - This model will be used for regression tasks using decision tree algorithms.

In [50]:
from sklearn import tree

decisiontree_model = tree.DecisionTreeRegressor()
decisiontree_model

In this cell, we train the decision tree regressor model on the training data:

- **Train the Model**:
  - `decisiontree_model.fit(X=normalized_X_train, y=y_train)`
    - Fits the decision tree regressor model to the normalized feature data (`normalized_X_train`) and the target variable (`y_train`).
    - The `fit` method builds the decision tree by learning the relationships between the features and the target variable.

This step trains the decision tree model, allowing it to predict the target variable based on the input features.

In [51]:
decisiontree_model.fit(
    X=normalized_X_train, 
    y=y_train
)

In this cell, we save the trained decision tree regressor model to a file:

- **Model File Path**:
  - `decisiontree_model_path = './models/ml/soc_estimation_ml_decisiontree.sav'`
    - Specifies the file path where the model will be saved.

- **Save the Model**:
  - `pickle.dump(decisiontree_model, open(decisiontree_model_path, 'wb'))`
    - Uses the `pickle` module to serialize and save the `decisiontree_model` to the specified file path.
    - `open(decisiontree_model_path, 'wb')` opens the file in write-binary mode.
    - `pickle.dump` writes the model to the file, allowing it to be loaded and used later without retraining.

This step ensures that the trained decision tree regressor model is preserved for future use or deployment.

In [52]:
decisiontree_model_path = './models/ml/soc_estimation_ml_decisiontree.sav'
pickle.dump(decisiontree_model, open(decisiontree_model_path, 'wb'))

# Nearest Neighbors Regression

In this cell, we initialize a K-Nearest Neighbors (KNN) regressor model:

- **Number of Neighbors**:
  - `n_neighbors = 5`
    - Sets the number of neighbors to be used by the K-Nearest Neighbors algorithm. In this case, the model will consider the 5 nearest neighbors to make predictions.

- **Create K-Nearest Neighbors Regressor**:
  - `nnregression_model = neighbors.KNeighborsRegressor(n_neighbors)`
    - Initializes an instance of the `KNeighborsRegressor` class from the `neighbors` module of `sklearn`.
    - This model will be used for regression tasks based on the K-Nearest Neighbors algorithm.

In [69]:
from sklearn import neighbors

n_neighbors = 5

nnregression_model = neighbors.KNeighborsRegressor(n_neighbors)
nnregression_model

In this cell, we train the K-Nearest Neighbors (KNN) regressor model on the training data:

- **Train the Model**:
  - `nnregression_model.fit(X=normalized_X_train, y=y_train)`
    - Fits the KNN regressor model to the normalized feature data (`normalized_X_train`) and the target variable (`y_train`).
    - The `fit` method trains the model by storing the training data, which will be used to make predictions based on the closest neighbors.

This step trains the KNN regressor model, allowing it to predict the target variable using the specified number of nearest neighbors.


In [70]:
nnregression_model.fit(
    X=normalized_X_train, 
    y=y_train
)

In this cell, we save the trained K-Nearest Neighbors (KNN) regressor model to a file:

- **Model File Path**:
  - `nnregression_model_path = './models/ml/soc_estimation_ml_nnregression.sav'`
    - Specifies the file path where the model will be saved.

- **Save the Model**:
  - `pickle.dump(nnregression_model, open(nnregression_model_path, 'wb'))`
    - Uses the `pickle` module to serialize and save the `nnregression_model` to the specified file path.
    - `open(nnregression_model_path, 'wb')` opens the file in write-binary mode.
    - `pickle.dump` writes the model to the file, allowing it to be loaded and used later without retraining.

This step ensures that the trained KNN regressor model is preserved for future use or deployment.

In [71]:
nnregression_model_path = './models/ml/soc_estimation_ml_nnregression.sav'
pickle.dump(nnregression_model, open(nnregression_model_path, 'wb'))