# Single Training Image Generation Using Data Augmentation

## Setup

In [1]:
%pip install fastai==2.5.3 -q -U
%pip freeze

Note: you may need to restart the kernel to use updated packages.
anyio @ file:///home/conda/feedstock_root/build_artifacts/anyio_1666191106763/work/dist
argon2-cffi @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi_1640817743617/work
argon2-cffi-bindings @ file:///home/conda/feedstock_root/build_artifacts/argon2-cffi-bindings_1666850859330/work
attrs @ file:///home/conda/feedstock_root/build_artifacts/attrs_1659291887007/work
backcall @ file:///home/conda/feedstock_root/build_artifacts/backcall_1592338393461/work
backports.functools-lru-cache @ file:///home/conda/feedstock_root/build_artifacts/backports.functools_lru_cache_1618230623929/work
beautifulsoup4 @ file:///home/conda/feedstock_root/build_artifacts/beautifulsoup4_1649463573192/work
bleach @ file:///home/conda/feedstock_root/build_artifacts/bleach_1656355450470/work
blis==0.7.9
catalogue==2.0.8
certifi==2022.12.7
cffi @ file:///home/conda/feedstock_root/build_artifacts/cffi_1666754707314/work
charset-normalizer==

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from fastai.vision.all import *
import sys

# Load EB_ML python libraries
# The following libraries are used in this notebook and should be installed in your local machine before running this notebook.
# eb_colab_utils.py
# eb_ml_battery_lib.py
# eb_ml_utils.py

# path to load external *.py files used in this notebook
# Note: in Google Colab virtual machine you shoud copy the files in "/content" folder after BEFORE running this notebook's cell

external_python_file_path="/home/studio-lab-user/code"
sys.path.append(external_python_file_path)

from ml4measurement.eb_ml_utils import score_model,build_learner
from ml4measurement.eb_ml_colab_utils import get_root_path
from ml4measurement.eb_ml_battery_lib import generate_image_files_from_measure_table,generate_EIS_images_for_experiment

## Experiment Configuration

In [3]:
import yaml

config_file = '../config/config.yaml'

#Read the configuration
with open(config_file) as cfg_file:
    config = yaml.load(stream = cfg_file, Loader = yaml.FullLoader)

In [4]:
# Map the working folder to Google Drive if is running in Google Colab environment
config['ROOT_DIR'] = get_root_path(config['working_folder'])
config['Splitter'] = RandomSplitter(valid_pct=0.3, seed=41)
config['rePat'] = r'^.*_(\d+).png$'

NOT running on COLAB


# Load Measurement Dataset

In [5]:
from LiBEIS.code.utilities import read_measurement_table

# Load dataset using LiBEIS 
_, meas_table_wide, battery_id_col_name, freq_id_col_name, impedance_col_name, measure_id_col_name, soc_col_name = read_measurement_table(config_file)

indices = np.arange(meas_table_wide.shape[0]).astype(np.uint)

meas_table_wide

  meas_table_wide = meas_table_tall.pivot(primary_key, freq_id_col_name)


Unnamed: 0_level_0,MEASURE_ID,SOC,BATTERY_ID,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE,IMPEDANCE_VALUE
FREQUENCY_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,0,1,2,3,4,5,6,7,8,9,10,11,12,13
0,02_4,10,2,0.117933-0.006453j,0.115688-0.005226j,0.114258-0.004235j,0.113243-0.003809j,0.111026-0.004652j,0.109057-0.005699j,0.106340-0.006653j,0.101733-0.007526j,0.097836-0.007710j,0.094492-0.007356j,0.090119-0.006749j,0.086891-0.006165j,0.084373-0.005057j,0.081658-0.003654j
1,02_4,20,2,0.108914-0.004577j,0.107390-0.003691j,0.106296-0.002381j,0.105806-0.002027j,0.104751-0.002257j,0.103965-0.002888j,0.102727-0.004009j,0.099835-0.005825j,0.096654-0.006610j,0.093300-0.006884j,0.088800-0.006496j,0.086017-0.005872j,0.083273-0.005070j,0.080611-0.003607j
2,02_4,30,2,0.105084-0.003881j,0.103869-0.003199j,0.103098-0.002013j,0.102843-0.001713j,0.101968-0.001720j,0.101440-0.002340j,0.100473-0.003114j,0.098450-0.004741j,0.095612-0.006006j,0.092412-0.006553j,0.088123-0.006485j,0.085025-0.005862j,0.082477-0.005045j,0.079936-0.003489j
3,02_4,40,2,0.104545-0.003870j,0.103324-0.002926j,0.102673-0.002098j,0.102103-0.001555j,0.101310-0.001838j,0.100783-0.002343j,0.100185-0.002937j,0.097745-0.004520j,0.095411-0.005965j,0.092198-0.006490j,0.087758-0.006504j,0.084928-0.005835j,0.082257-0.005009j,0.079709-0.003534j
4,02_4,50,2,0.103768-0.004256j,0.102449-0.003355j,0.101666-0.002424j,0.101108-0.001858j,0.099993-0.001833j,0.099596-0.002300j,0.099106-0.002856j,0.096909-0.004247j,0.094556-0.005593j,0.091448-0.006259j,0.087291-0.006430j,0.084213-0.005777j,0.081852-0.004830j,0.079187-0.003503j
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
235,06_8,60,6,0.105475-0.005807j,0.103037-0.005163j,0.101439-0.003260j,0.100950-0.002266j,0.099744-0.002003j,0.099114-0.002131j,0.098424-0.002288j,0.096977-0.003218j,0.095176-0.004172j,0.092986-0.004747j,0.089744-0.005112j,0.087370-0.004697j,0.085205-0.004107j,0.082964-0.003023j
236,06_8,70,6,0.104604-0.006193j,0.102764-0.004643j,0.101474-0.003235j,0.100668-0.002506j,0.099592-0.001932j,0.098702-0.002253j,0.098120-0.002294j,0.096754-0.003253j,0.095009-0.003961j,0.092899-0.004740j,0.089579-0.004943j,0.087258-0.004694j,0.085021-0.004056j,0.082766-0.003054j
237,06_8,80,6,0.103358-0.005761j,0.101491-0.004417j,0.100160-0.003028j,0.099628-0.002222j,0.098459-0.001881j,0.097915-0.001947j,0.097327-0.002187j,0.096023-0.002886j,0.094421-0.003751j,0.092544-0.004401j,0.089422-0.004757j,0.087084-0.004596j,0.084967-0.004073j,0.082873-0.002947j
238,06_8,90,6,0.103680-0.005234j,0.102111-0.004055j,0.100888-0.002762j,0.100350-0.002017j,0.099374-0.001809j,0.098816-0.001959j,0.098189-0.002165j,0.096783-0.003066j,0.095252-0.004004j,0.093130-0.004682j,0.089758-0.004923j,0.087432-0.004681j,0.085227-0.004055j,0.082948-0.003066j


In [6]:
from itertools import product
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from LiBEIS.code.utilities import read_measurement_table,FeatureExtractionMode,DataNormaliser,get_patterns

#Note: Some pattern_extraction_mode values are disabled to avoid  exceeding 
# the computation time limit of Code Ocean 
pattern_extraction_modes =\
    [#FeatureExtractionMode(mode = 'module'),
     #FeatureExtractionMode(mode = 'phase'),
     FeatureExtractionMode(mode = 'bode'),
     #FeatureExtractionMode(mode = 'real'),
     #FeatureExtractionMode(mode = 'imag'),
     FeatureExtractionMode(mode = 'real+imag')]



# Normalization is not compatible with data augmentation
normalization_modes =\
    [DataNormaliser(name = 'None', model = None),
     #DataNormaliser(name = 'MinMax', model = MinMaxScaler),
     #DataNormaliser(name = 'Z-score', model = StandardScaler)
    ]

data_agumentation_factors = [10]

#Generate a full factorial plan by pattern_extraction_modes x classifiers
factorial_plan = product(pattern_extraction_modes, normalization_modes,data_agumentation_factors)

factorial_plan = list(factorial_plan)
num_experiments = len(factorial_plan)

In [7]:
cross_validation_list = config['cross_validation_list']
cross_validation_experiment_names = config['cross_validation_experiment_names']
all_batteries = config['all_batteries']
experiment_name_prefix= config['experiment_name_prefix']
generate_images=config['generate_images']
measure_id_col_name= config['measure_id_field']
soc_col_name=config['soc_field']
n_epochs=config['n_epochs']
root_image_files_path = config['root_images_path']
root_test_image_files_path = config['root_test_images_path']
test_measures_list = config['test_measure_list']
test_measures_list2 = config['test_measure_list2']

## Test Image Geenration

In [9]:
experiment_name_prefix="Test_dataset_"
config['generate_images'] = True
#Store the results here
df_results = pd.DataFrame()
for experiment_idx, experiment in enumerate(factorial_plan):

    print(f'Running experiment {experiment_idx + 1} of {num_experiments}')
    
    predicted_labels = list()
    true_labels = list()
    
    #Compute the patterns
    patterns = get_patterns(meas_table_wide, impedance_col_name,
                            mode = experiment[0].mode, 
                            kwargs = experiment[0].params)
    #Perform data normalisation
    patterns = experiment[1].normalise(patterns)
    data_augmentation_factor=experiment[2]
    
    # Train and score model for cross-validation
    #TODO: nel dataset di validazione alcune classi non sono rappresentate. Usare stratified splitter
    config['Splitter'] = RandomSplitter(valid_pct=0.3, seed=41)
    config['rePat'] = r'^.*_(\d+).png$'
    model_accuracy={}

    print("battery data acquisitions for test: ")
    print(test_measures_list)
    print("battery data acquisitions for test 2: ")
    print(test_measures_list2)



    if config['generate_images']:
        test_set_condition1 = meas_table_wide[(measure_id_col_name)].isin(test_measures_list)
        meas_table_for_testing1= meas_table_wide[test_set_condition1]

        test_set_condition2 = meas_table_wide[(measure_id_col_name)].isin(test_measures_list2)
        meas_table_for_testing2= meas_table_wide[test_set_condition2]
        
        experiment_name=experiment_name_prefix+"NEW_BATT_Exp_"+str(experiment_idx)
        print("Experiment name: "+experiment_name)
        config['TEST_IMAGES_PATH'] = config['root_test_images_path']+"/"+experiment_name

    
        image_generation_record_test1 = generate_EIS_images_for_experiment(experiment_name, meas_table_for_testing1, impedance_col_name,
            soc_col_name, measure_id_col_name, root_test_image_files_path, 0.0001, experiment_idx, experiment)

        experiment_name=experiment_name_prefix+"NEW_MEAS_Exp_"+str(experiment_idx)
        print("Experiment name: "+experiment_name)
        config['TEST_IMAGES_PATH'] = config['root_test_images_path']+"/"+experiment_name
        
        
        image_generation_record_test2 = generate_EIS_images_for_experiment(experiment_name, meas_table_for_testing2, impedance_col_name,
            soc_col_name, measure_id_col_name, root_test_image_files_path, 0.0001, experiment_idx, experiment)
        

Running experiment 1 of 2
battery data acquisitions for test: 
['06_4', '06_5', '06_6', '06_7', '06_8']
battery data acquisitions for test 2: 
['05_8']
Experiment name: Test_dataset_NEW_BATT_Exp_0
Data augmentation factor is greater than 1. Augmenting data...
dataset row number: 500
start image file generation. IMAGE_PATH: ../data/images/Test_dataset_NEW_BATT_Exp_0
soc: 10
measure: 06_4
../data/images/Test_dataset_NEW_BATT_Exp_0/Test_dataset_NEW_BATT_Exp_0-06_4_10.png
soc: 20
measure: 06_4
../data/images/Test_dataset_NEW_BATT_Exp_0/Test_dataset_NEW_BATT_Exp_0-06_4_20.png
soc: 30
measure: 06_4
../data/images/Test_dataset_NEW_BATT_Exp_0/Test_dataset_NEW_BATT_Exp_0-06_4_30.png
soc: 40
measure: 06_4
../data/images/Test_dataset_NEW_BATT_Exp_0/Test_dataset_NEW_BATT_Exp_0-06_4_40.png
soc: 50
measure: 06_4
../data/images/Test_dataset_NEW_BATT_Exp_0/Test_dataset_NEW_BATT_Exp_0-06_4_50.png
soc: 60
measure: 06_4
../data/images/Test_dataset_NEW_BATT_Exp_0/Test_dataset_NEW_BATT_Exp_0-06_4_60.png
so

## Leave One Out Image Generation

In [None]:
#Store the results here
df_results = pd.DataFrame()
for experiment_idx, experiment in enumerate(factorial_plan):

    print(f'Running experiment {experiment_idx + 1} of {num_experiments}')
    
    predicted_labels = list()
    true_labels = list()
    
    #Compute the patterns
    patterns = get_patterns(meas_table_wide, impedance_col_name,
                            mode = experiment[0].mode, 
                            kwargs = experiment[0].params)
    #Perform data normalisation
    patterns = experiment[1].normalise(patterns)
    data_augmentation_factor=experiment[2]
    
    
    # Train and score model for cross-validation
    #TODO: nel dataset di validazione alcune classi non sono rappresentate. Usare stratified splitter
    config['Splitter'] = RandomSplitter(valid_pct=0.3, seed=41)
    config['rePat'] = r'^.*_(\d+).png$'
    model_accuracy={}
    cross_validation_experiment_index = 0 # Cross validation experiment index
    for cv_measures_to_hold in cross_validation_list:
        cv_exp_name=cross_validation_experiment_names[cross_validation_experiment_index]
        cv_train_measures_list=all_batteries.copy()
        cv_test_measures_list = []
        for batt_mes in cv_measures_to_hold:
            cv_train_measures_list.remove(batt_mes)
            cv_test_measures_list.append(batt_mes)
        print("battery data acquisitions for training and validation: ")
        print(cv_train_measures_list)
        print("battery data acquisitions for test: ")
        print(cv_test_measures_list)
        experiment_name=experiment_name_prefix+str(cv_exp_name)+"_Exp_"+str(experiment_idx)
        print("model name: "+experiment_name)

        config['ExperimentName'] = experiment_name
        config['IMAGES_PATH'] = config['root_images_path']+"/"+experiment_name
        config['TEST_IMAGES_PATH'] = config['root_test_images_path']+"/"+experiment_name


        if config['generate_images']:
            train_set_condition = meas_table_wide[(measure_id_col_name)].isin(cv_train_measures_list)
            meas_table_for_training= meas_table_wide[train_set_condition]

            test_set_condition = meas_table_wide[(measure_id_col_name)].isin(cv_test_measures_list)
            meas_table_for_testing= meas_table_wide[test_set_condition]            


            image_generation_record = generate_EIS_images_for_experiment(experiment_name, meas_table_for_training, impedance_col_name,
                soc_col_name, measure_id_col_name, root_image_files_path, 0.0001, experiment_idx, experiment)

            image_generation_record_test = generate_EIS_images_for_experiment(experiment_name, meas_table_for_testing, impedance_col_name,
                soc_col_name, measure_id_col_name, root_test_image_files_path, 0.0001, experiment_idx, experiment)
    

        # TRAINING
        #learn= build_and_train_learner(config,n_epochs=n_epochs)
        #SAVE
        #weights_filename=save_model_weights(learn,config["models_path"],experiment_name)
        #filename_pth= weights_filename+".pth"
        #if IN_COLAB:
        #    copy_model_to_google_drive(filename_pth,learn.model_dir,config["models_path"])
        ## SCORE MODEL
        #dl=build_data_loader(config)
        #model_accuracy[experiment_name]=score_model(weights_filename,dl,config["models_path"])
        
        cross_validation_experiment_index += 1
        
        #Add record to dataframe
        record = pd.DataFrame({
            'Cross_validation_experiment_index' : experiment_idx,
            'Experiment index' : experiment_idx,
            'Feature extraction mode' : experiment[0].mode,
            'Feature normalisation mode' : experiment[1].name,
            'Data augmentation factor' : data_augmentation_factor,
            'Num features' : patterns.shape[1],
            #'Model name' : experiment_name,
            #'Model accuracy' : model_accuracy[experiment_name],
            'Training images path':config['IMAGES_PATH'],
            'Test images path': config['TEST_IMAGES_PATH']},
            
            index = [0])
        
        df_results = pd.concat([record, df_results.loc[:]]).reset_index(drop=True)
df_results.to_csv(config['results_path'] + '/leave_one_out_generation_results.csv', index=False)

## single Training Image Generation

In [None]:
from itertools import product
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from LiBEIS.code.utilities import read_measurement_table,FeatureExtractionMode,DataNormaliser,get_patterns

#Note: Some pattern_extraction_mode values are disabled to avoid  exceeding 
# the computation time limit of Code Ocean 
pattern_extraction_modes =\
    [FeatureExtractionMode(mode = 'module'),
     FeatureExtractionMode(mode = 'phase'),
     FeatureExtractionMode(mode = 'module+phase'),
     FeatureExtractionMode(mode = 'real'),
     FeatureExtractionMode(mode = 'imag'),
     FeatureExtractionMode(mode = 'real+imag'),
     FeatureExtractionMode(mode = 'bode')
     ]



# Normalization is not compatible with data augmentation
normalization_modes =\
    [DataNormaliser(name = 'None', model = None),
     #DataNormaliser(name = 'MinMax', model = MinMaxScaler),
     #DataNormaliser(name = 'Z-score', model = StandardScaler)
    ]

data_agumentation_factors = [10,20]

#Generate a full factorial plan by pattern_extraction_modes x classifiers
experiment_plan = product(pattern_extraction_modes, normalization_modes,data_agumentation_factors)

experiment_runs_list = list(experiment_plan)
num_experiments = len(experiment_runs_list)

In [None]:
cross_validation_list = config['cross_validation_list']
cross_validation_experiment_names = config['cross_validation_experiment_names']
all_batteries = config['all_batteries']
experiment_name_prefix= config['experiment_name_prefix']
generate_images=config['generate_images']
measure_id_col_name= config['measure_id_field']
soc_col_name=config['soc_field']
n_epochs=config['n_epochs']



root_image_path=config['root_images_path']


In [None]:
print(f"Generating images for experiment plan using {len(all_batteries)} measurements from batteries: {all_batteries}")

In [None]:
from ml4measurement.eb_ml_battery_lib import generate_EIS_images_for_experiment_plan

train_set_condition = meas_table_wide[(measure_id_col_name)].isin(all_batteries)
filtered_meas_table= meas_table_wide[train_set_condition]
experiment_run_name= "prova_data_augmentation"
df_results=generate_EIS_images_for_experiment_plan(experiment_name='Paper_MES_single_training_',
 experiment_runs_list=experiment_runs_list, 
 meas_table_wide=filtered_meas_table,
 impedance_col_name=impedance_col_name,
 soc_col_name=soc_col_name,measure_id_col_name=measure_id_col_name,
 root_image_files_path=root_image_path,
 noise_std_dev=0.0001)
df_results.to_csv(config['results_path'] + '/single_train_generation_results.csv', index=False)

In [None]:
df_results

In [None]:
df_results['Image path']

In [None]:
from ml4measurement.eb_ml_battery_lib import get_image_file_names_for_soc,compute_image_overlay


for experiment_image_path in df_results['Image path']:
    for soc_label in config['soc_list']:
        image_files_soc=get_image_file_names_for_soc(experiment_image_path,int(soc_label))
        print('SOC Label: '+soc_label)
        print('Number of images: '+str(len(image_files_soc)))
        print('Image path: '+experiment_image_path)
        image_overlay= compute_image_overlay(image_files_soc)

        plt.figure()
        plt.imshow(image_overlay)
        plt.title('Overlay of all the images generated for SOC '+ soc_label, fontsize=16)
        plt.show()