# Notebook for making interim IFE data

_Since we need to relate cycling data to cycle life for each cell, it is practical to have all the data for each cell gathered in seperate directories._

__Original data structure:__
- \cycles_data
    - one summary file for every cell
- \curves_data
    - one file for every charge and every discharge cycle for every cell


__Data structure for interim data (in Github repository)__
- \interim
    - \cell 1
        - summary file
        - C20 cycles
            - all C20 charge/discharge cycles (characterization cycles)
        - IR cycles
            - All IR cycles
        - cycle 15 and 115 files, actual and normalized
    - \cell 2
    - ...

\
\
\
Last edit: 06.04.22

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import os, random
import pathlib
import glob
import shutil

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

### Make interim data

In [50]:
# Original paths
curves_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\curves_data"
cycles_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\cycle_data"

# New path for interim data
interim_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim"

# List of cell-ids
cell_id = []

# Make cell directories if summary file contains more than 100 cycles (exclude shelf life cells)
# Also add the summary file to the directory
for filename in os.listdir(cycles_path):
    f = os.path.join(cycles_path,filename)
    df = pd.read_csv(f, encoding = "ISO-8859-1", sep='\t')
    if(len(df.index) > 500): # Gets rid of shelf-life cells
        cell_id.append(filename[:18]) # add cell id to id-list
        cell_path = os.path.join(interim_path, filename[:18]) # new path for summary file
        
        # make cell directory and copy summary file to it
        if not(os.path.exists(cell_path)):
            os.mkdir(cell_path)
            shutil.copy(f, cell_path)


# Make C20 cycles directory and add the cycles (its the 3rd cycle in each test that has characterization)
for foldername in os.listdir(interim_path): # loop through cell directories
    
    # Make C20_cycles path
    cell_dir = os.path.join(interim_path, foldername)
    C20_dir = os.path.join(cell_dir, 'C20_cycles') 
    
    # Find C20 cycles by looking at current from summary file
    summary_file = glob.glob(os.path.join(cell_dir, '*' + 'Lot' + '*'))[0]
    summary_df = pd.read_csv(summary_file, encoding = "ISO-8859-1", sep='\t')
    summary_c20_df = summary_df.loc[summary_df['Current_dch (C)'] == -0.05]
    tasks = summary_c20_df['Tasks'].values # Unique tasks number for each test

    # Make C20_cycles directory and copy the C20 cycles to it if it does not exist already
    if not(os.path.exists(C20_dir)):
        os.mkdir(C20_dir)
        for task in tasks:
            for curve_file in(glob.glob(os.path.join(curves_path, '*' + str(task) + '_3*'))):
                shutil.copy(curve_file, C20_dir)
                

    # Find the 115th and 15th regular cycles and copy the files into the cell directory
    # If the cycles are not found, the closest cycles are chosen
    c15_actual = summary_df.loc[summary_df['Cycle - total'] > 15]['Cycle - actual'].values[0]
    task_c15 = summary_df.loc[summary_df['Cycle - total'] > 15]['Tasks'].values[0]
    
    c115_actual = summary_df.loc[summary_df['Cycle - total'] > 115]['Cycle - actual'].values[0]
    task_c115 = summary_df.loc[summary_df['Cycle - total'] > 115]['Tasks'].values[0]
    
    for c115_file in(glob.glob(os.path.join(curves_path, '*' + str(task_c115) + '_' + str(c115_actual) + '_dch.txt'))):
        shutil.copy(c115_file, cell_dir)
    for c15_file in(glob.glob(os.path.join(curves_path, '*' + str(task_c15) + '_' + str(c15_actual) + '_dch.txt'))):
        shutil.copy(c15_file, cell_dir)
        
     # Find the 115th and 15th normalized cycles and copy the files into the cell directory
    c15_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 15]['Cycle - actual'].values[0]
    task_c15_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 15]['Tasks'].values[0]
    
    c115_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Cycle - actual'].values[0]
    task_c115_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Tasks'].values[0]
    
    for c115_file_norm in(glob.glob(os.path.join(curves_path, '*' + str(task_c115_norm) + '_' + str(c115_actual_norm) + '_dch.txt'))):
        shutil.copy(c115_file_norm, cell_dir)
    for c15_file_norm in(glob.glob(os.path.join(curves_path, '*' + str(task_c15_norm) + '_' + str(c15_actual_norm) + '_dch.txt'))):
        shutil.copy(c15_file_norm, cell_dir)
    

In [66]:
# Make 
interim_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim"
for foldername in os.listdir(interim_path):
    print(foldername)

    cell_path = os.path.join(interim_path, foldername)
    IR_path = os.path.join(cell_path, 'IR_cycles')
    print(IR_path)
    if not(os.path.exists(IR_path)):
        os.mkdir(IR_path)
        for file in (glob.glob(os.path.join(curves_path, foldername + '*R_DC.txt'))):
            shutil.copy(file, IR_path)
       

Cell_ID_SK04A14442
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A14442\IR_cycles
Cell_ID_SK04A20094
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A20094\IR_cycles
Cell_ID_SK04A20099
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A20099\IR_cycles
Cell_ID_SK04A20110
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A20110\IR_cycles
Cell_ID_SK04A22375
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A22375\IR_cycles
Cell_ID_SK04A23557
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A23557\IR_cycles
Cell_ID_SK04A27576
C:\Users\ife12216\OneDrive - Institutt for En

### Testing stuff

In [24]:
for foldername in os.listdir(interim_path): 
    cell_dir = os.path.join(interim_path, foldername)
    wrongfile1 = glob.glob(os.path.join(cell_dir, '*dchC115.txt'))[0]
    wrongfile2 = glob.glob(os.path.join(cell_dir, '*dchC115_norm.txt'))[0]
    
    rightfile1 = wrongfile1[:-8] + '_C115.txt'
    os.rename(wrongfile1, rightfile1)
    
    rightfile2 = wrongfile2[:-13] + '_C115_norm.txt'
    os.rename(wrongfile2, rightfile2)

In [16]:
curves_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\curves_data"
interim_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim"
test_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\test"

for foldername in os.listdir(interim_path): # loop through cell directories
    # Make C20_cycles path
    cell_dir = os.path.join(interim_path, foldername)
    print(cell_dir)
    summary_file = os.path.join(cell_dir, os.listdir(cell_dir)[1])
    summary_df = pd.read_csv(summary_file, encoding = "ISO-8859-1", sep='\t')
    
    #print(summary_df.loc[summary_df['Cycle - total'] == 115]['Ah_dch (Ah)'].values[0])
    #print(summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Ah_dch (C20 first) (%)'].values[0])
    

    c15_actual = summary_df.loc[summary_df['Cycle - total'] > 15]['Cycle - actual'].values[0]
    c15_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 15]['Cycle - actual'].values[0]
    task_c15 = summary_df.loc[summary_df['Cycle - total'] > 15]['Tasks'].values[0]
    task_c15_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 15]['Tasks'].values[0]
    
    c115_actual = summary_df.loc[summary_df['Cycle - total'] > 115]['Cycle - actual'].values[0]
    c115_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Cycle - actual'].values[0]
    task_c115 = summary_df.loc[summary_df['Cycle - total'] > 115]['Tasks'].values[0]
    task_c115_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Tasks'].values[0]
    
    c14_actual = summary_df.loc[summary_df['Cycle - total'] > 14]['Cycle - actual'].values[0]
    task_c14 = summary_df.loc[summary_df['Cycle - total'] > 14]['Tasks'].values[0]
    c14_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 14]['Cycle - actual'].values[0]
    task_c14_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 14]['Tasks'].values[0]
    
    c16_actual = summary_df.loc[summary_df['Cycle - total'] > 16]['Cycle - actual'].values[0]
    task_c16 = summary_df.loc[summary_df['Cycle - total'] > 16]['Tasks'].values[0]
    c16_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 16]['Cycle - actual'].values[0]
    task_c16_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 16]['Tasks'].values[0]
    
    c20_actual = summary_df.loc[summary_df['Cycle - total'] > 20]['Cycle - actual'].values[0]
    task_c20 = summary_df.loc[summary_df['Cycle - total'] > 20]['Tasks'].values[0]
    c20_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 20]['Cycle - actual'].values[0]
    task_c20_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 20]['Tasks'].values[0]
    
    c114_actual = summary_df.loc[summary_df['Cycle - total'] > 114]['Cycle - actual'].values[0]
    task_c114 = summary_df.loc[summary_df['Cycle - total'] > 114]['Tasks'].values[0]
    c114_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 114]['Cycle - actual'].values[0]
    task_c114_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 114]['Tasks'].values[0]
    
    
    c15_file = glob.glob(os.path.join(curves_path, '*' + str(task_c15) + '_' + str(c15_actual) + '_dch.txt'))
    if not(c15_file):
        c15_file = glob.glob(os.path.join(curves_path, '*' + str(task_c14) + '_' + str(c14_actual) + '_dch.txt'))
    if not(c15_file):
        c15_file = glob.glob(os.path.join(curves_path, '*' + str(task_c16) + '_' + str(c16_actual) + '_dch.txt'))
    if not(c15_file):
        c15_file = glob.glob(os.path.join(curves_path, '*' + str(task_c20) + '_' + str(c20_actual) + '_dch.txt'))
    if not(c15_file):
        print(c15_file)
    newc15_path = shutil.copy(c15_file[0], cell_dir)
    os.rename(newc15_path, newc15_path[:-4] + '_C15.txt')
    
    
    c15_norm_file = glob.glob(os.path.join(curves_path, '*' + str(task_c15_norm) + '_' + str(c15_actual_norm) + '_dch.txt'))
    if not(c15_norm_file):
        c15_norm_file = glob.glob(os.path.join(curves_path, '*' + str(task_c14_norm) + '_' + str(c14_actual_norm) + '_dch.txt'))
    if not(c15_norm_file):
        c15_norm_file = glob.glob(os.path.join(curves_path, '*' + str(task_c16_norm) + '_' + str(c16_actual_norm) + '_dch.txt'))
    if not(c15_norm_file):
        c15_norm_file = glob.glob(os.path.join(curves_path, '*' + str(task_c20_norm) + '_' + str(c20_actual_norm) + '_dch.txt'))
    if not(c15_norm_file):
        print(c15_norm_file)
    newc15norm_path = shutil.copy(c15_norm_file[0], cell_dir)
    os.rename(newc15norm_path, newc15norm_path[:-4] + '_C15_norm.txt')
    
    c115_file = glob.glob(os.path.join(curves_path, '*' + str(task_c115) + '_' + str(c115_actual) + '_dch.txt'))
    if not(c115_file):
        c115_file = glob.glob(os.path.join(curves_path, '*' + str(task_c114) + '_' + str(c114_actual) + '_dch.txt'))
    if not(c115_file):
        print(c115_file)
    newc115_path = shutil.copy(c115_file[0], cell_dir)
    os.rename(newc115_path, newc115_path[:-4] + '_C115.txt')
    
    c115_norm_file = glob.glob(os.path.join(curves_path, '*' + str(task_c115_norm) + '_' + str(c115_actual_norm) + '_dch.txt'))
    if not(c115_norm_file):
        c115_norm_file = glob.glob(os.path.join(curves_path, '*' + str(task_c114_norm) + '_' + str(c114_actual_norm) + '_dch.txt'))
    if not(c115_norm_file):
        print(c115_norm_file)
    newc115norm_path = shutil.copy(c115_norm_file[0], cell_dir)
    os.rename(newc115norm_path, newc115norm_path[:-4] + '_C115_norm.txt')
    

C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A14442
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A20094
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A20099
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A20110
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A22375
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A23557
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A27576
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cel

In [52]:
files = glob.glob(os.path.join(curves_path, 'Cell_ID_SK04A14442*_R_DC.txt'))
print(len(files))

10


In [13]:
print(os.path.join(curves_path, '*' + str(task_c10) + '_' + str(cycle10_act) + '_dch.txt'))
print(os.path.join(curves_path, '*' + str(task_c100) + '_' + str(cycle10_act) + '_dch.txt'))

C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\curves_data\*15616_2_dch.txt
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\curves_data\*15732_2_dch.txt


In [48]:
# FOR DELETING CYCLING FILES, DONT RUN AGAIN

for cell in os.listdir(interim_path):
    cellpath = os.path.join(interim_path, cell)
    filelist = os.listdir(cellpath)
    if(len(filelist) == 4):
        path1 = os.path.join(cellpath, filelist[-1])
        os.remove(path1)
        path2 = os.path.join(cellpath, filelist[-2])
        os.remove(path2)
    elif(len(filelist) == 3):
        path3 = os.path.join(cellpath, filelist[-1])
        os.remove(path3)