# Notebook for making interim IFE data

_Since we need to relate cycling data to cycle life for each cell, it is practical to have all the data for each cell gathered in seperate directories._

__Original data structure:__
- \cycles_data
    - one summary file for every cell
- \curves_data
    - one file for every charge and every discharge cycle for every cell


__Data structure for interim data (in Github repository)__
- \interim
    - \cell 1
        - summary file
        - C20 cycles
            - all C20 charge/discharge cycles (characterization cycles)
    - \cell 2
    - ...

\
\
\
Last edit: 06.04.22

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import os, random
import pathlib
import glob
import shutil

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 100)

### Make interim data

In [50]:
# Original paths
curves_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\curves_data"
cycles_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\cycle_data"

# New path for interim data
interim_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim"

# List of cell-ids
cell_id = []

# Make cell directories if summary file contains more than 100 cycles (exclude shelf life cells)
# Also add the summary file to the directory
for filename in os.listdir(cycles_path):
    f = os.path.join(cycles_path,filename)
    df = pd.read_csv(f, encoding = "ISO-8859-1", sep='\t')
    if(len(df.index) > 500): # Gets rid of shelf-life cells
        cell_id.append(filename[:18]) # add cell id to id-list
        cell_path = os.path.join(interim_path, filename[:18]) # new path for summary file
        
        # make cell directory and copy summary file to it
        if not(os.path.exists(cell_path)):
            os.mkdir(cell_path)
            shutil.copy(f, cell_path)


# Make C20 cycles directory and add the cycles (its the 3rd cycle in each test that has characterization)
for foldername in os.listdir(interim_path): # loop through cell directories
    
    # Make C20_cycles path
    cell_dir = os.path.join(interim_path, foldername)
    C20_dir = os.path.join(cell_dir, 'C20_cycles') 
    
    # Find C20 cycles by looking at current from summary file
    summary_file = glob.glob(os.path.join(cell_dir, '*' + 'Lot' + '*'))[0]
    summary_df = pd.read_csv(summary_file, encoding = "ISO-8859-1", sep='\t')
    summary_c20_df = summary_df.loc[summary_df['Current_dch (C)'] == -0.05]
    tasks = summary_c20_df['Tasks'].values # Unique tasks number for each test

    # Make C20_cycles directory and copy the C20 cycles to it if it does not exist already
    if not(os.path.exists(C20_dir)):
        os.mkdir(C20_dir)
        for task in tasks:
            for curve_file in(glob.glob(os.path.join(curves_path, '*' + str(task) + '_3*'))):
                shutil.copy(curve_file, C20_dir)
                

    # Find the 115th and 15th regular cycles and copy the files into the cell directory
    c15_actual = summary_df.loc[summary_df['Cycle - total'] > 15]['Cycle - actual'].values[0]
    task_c15 = summary_df.loc[summary_df['Cycle - total'] > 15]['Tasks'].values[0]
    
    c115_actual = summary_df.loc[summary_df['Cycle - total'] > 115]['Cycle - actual'].values[0]
    task_c115 = summary_df.loc[summary_df['Cycle - total'] > 115]['Tasks'].values[0]
    
    for c115_file in(glob.glob(os.path.join(curves_path, '*' + str(task_c115) + '_' + str(c115_actual) + '_dch.txt'))):
        shutil.copy(c115_file, cell_dir)
    for c15_file in(glob.glob(os.path.join(curves_path, '*' + str(task_c15) + '_' + str(c15_actual) + '_dch.txt'))):
        shutil.copy(c15_file, cell_dir)
        
     # Find the 115th and 15th normalized cycles and copy the files into the cell directory
    c15_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 15]['Cycle - actual'].values[0]
    task_c15_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 15]['Tasks'].values[0]
    
    c115_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Cycle - actual'].values[0]
    task_c115_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Tasks'].values[0]
    
    for c115_file_norm in(glob.glob(os.path.join(curves_path, '*' + str(task_c115_norm) + '_' + str(c115_actual_norm) + '_dch.txt'))):
        shutil.copy(c115_file_norm, cell_dir)
    for c15_file_norm in(glob.glob(os.path.join(curves_path, '*' + str(task_c15_norm) + '_' + str(c15_actual_norm) + '_dch.txt'))):
        shutil.copy(c15_file_norm, cell_dir)
    

In [66]:
# Make 
interim_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim"
for foldername in os.listdir(interim_path):
    print(foldername)

    cell_path = os.path.join(interim_path, foldername)
    IR_path = os.path.join(cell_path, 'IR_cycles')
    print(IR_path)
    if not(os.path.exists(IR_path)):
        os.mkdir(IR_path)
        for file in (glob.glob(os.path.join(curves_path, foldername + '*R_DC.txt'))):
            shutil.copy(file, IR_path)
       

Cell_ID_SK04A14442
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A14442\IR_cycles
Cell_ID_SK04A20094
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A20094\IR_cycles
Cell_ID_SK04A20099
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A20099\IR_cycles
Cell_ID_SK04A20110
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A20110\IR_cycles
Cell_ID_SK04A22375
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A22375\IR_cycles
Cell_ID_SK04A23557
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_github\data\IFE\interim\Cell_ID_SK04A23557\IR_cycles
Cell_ID_SK04A27576
C:\Users\ife12216\OneDrive - Institutt for En

### Testing stuff

In [54]:
curves_path = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\curves_data"

for foldername in os.listdir(interim_path): # loop through cell directories
    # Make C20_cycles path
    cell_dir = os.path.join(interim_path, foldername)
    summary_file = os.path.join(cell_dir, os.listdir(cell_dir)[1])
    summary_df = pd.read_csv(summary_file, encoding = "ISO-8859-1", sep='\t')
    
    #print(summary_df.loc[summary_df['Cycle - total'] == 115]['Ah_dch (Ah)'].values[0])
    #print(summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Ah_dch (C20 first) (%)'].values[0])
    

    c15_actual = summary_df.loc[summary_df['Cycle - total'] > 15]['Cycle - actual'].values[0]
    c15_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 15]['Cycle - actual'].values[0]
    task_c15 = summary_df.loc[summary_df['Cycle - total'] > 15]['Tasks'].values[0]
    task_c15_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 15]['Tasks'].values[0]
    
    c115_actual = summary_df.loc[summary_df['Cycle - total'] > 115]['Cycle - actual'].values[0]
    c115_actual_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Cycle - actual'].values[0]
    task_c115 = summary_df.loc[summary_df['Cycle - total'] > 115]['Tasks'].values[0]
    task_c115_norm = summary_df.loc[summary_df['Cycle - total - normalized'] > 115]['Tasks'].values[0]
    
    
    print(c115_actual, c115_actual_norm, task_c115, task_c115_norm)
    
    #print(cycle10_act, cycle100_act)
    
    
    #c100_file = glob.glob(os.path.join(curves_path, '*' + str(task_c100) + '_' + str(cycle100_act) + '_dch.txt'))
    #c10_file = glob.glob(os.path.join(curves_path, '*' + str(task_c10) + '_' + str(cycle10_act) + '_dch.txt'))
    #print(pathlib.Path(c100_file).is_file())
    #if not (len(c10_file) == 0 or len(c100_file) == 0):
        #print(c10_file, c100_file)

49 59 15638 15638
59 15 15219 15289
17 53 15382 15382
50 17 15185 15393
62 1 15158 15257
70 1 15629 15758
0 5 15754 15772
109 24 15526 15749
67 1 15074 15212
109 21 15846 16027
3 6 16098 16126
31 1 14403 14482
16 48 14693 14693
20 50 14701 14701
60 13 14940 15075
61 13 14923 15028
4 6 16143 16176
108 8 16072 16264
62 14 15047 15140
18 28 14465 14465
24 26 14949 15100
0 12 14781 14816
74 40 15067 15211
67 3 14731 14873
70 1 13644 13742
22 2 13628 13747
19 3 13629 13744
48 71 13640 13755
50 57 13531 13531
8 27 13752 13752
109 6 16642 16698
98 92 13641 13756
70 1 13610 13685
109 8 14217 14382
4 65 14700 14700
13 4 15797 15972
103 2 14177 14273
109 24 14218 14377
6 22 14122 14122
70 1 13765 13833
19 12 14011 14168
6 19 13751 13751
70 1 13631 13758
66 1 13632 13732
61 3 15732 15808


In [52]:
files = glob.glob(os.path.join(curves_path, 'Cell_ID_SK04A14442*_R_DC.txt'))
print(len(files))

10


In [13]:
print(os.path.join(curves_path, '*' + str(task_c10) + '_' + str(cycle10_act) + '_dch.txt'))
print(os.path.join(curves_path, '*' + str(task_c100) + '_' + str(cycle10_act) + '_dch.txt'))

C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\curves_data\*15616_2_dch.txt
C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\IFEData\LG_JP3\curves_data\*15732_2_dch.txt


In [48]:
# FOR DELETING CYCLING FILES, DONT RUN AGAIN

for cell in os.listdir(interim_path):
    cellpath = os.path.join(interim_path, cell)
    filelist = os.listdir(cellpath)
    if(len(filelist) == 4):
        path1 = os.path.join(cellpath, filelist[-1])
        os.remove(path1)
        path2 = os.path.join(cellpath, filelist[-2])
        os.remove(path2)
    elif(len(filelist) == 3):
        path3 = os.path.join(cellpath, filelist[-1])
        os.remove(path3)