### This notebook takes in the raw MIT data and makes the following new datasets:

Used for Severson et al models:
- Summary data (per cycle)
- Cycles interpolated data for cycle 10 and 100, discharge only
- Difference in discharge capacity between cycle 100 and 10 ($\Delta Q_{100-10}(V)$)

Used for Fei et al models:
- Hei

In [1]:
import pathlib
import matplotlib.pyplot as plt
import pandas as pd
import os, random
import numpy as np
from functions import select_dataset

In [None]:
testfile = pathlib.Path(r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\MLData_selected") / "FastCharge_000022_CH24_structure.json"
testfile.is_file()

In [None]:
df = pd.read_json(testfile)

In [None]:
df.head(5)

#### Datasets for Severson et al models

In [None]:
directory = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\MLData"
savepath1 = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_Github\data\interim\summary"
savepath2 = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_Github\data\interim\cycles_interpolated\cycle10_discharge"
savepath3 = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_Github\data\interim\cycles_interpolated\cycle100_discharge"
savepath4 = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_Github\data\interim\cycles_interpolated\DeltaQ100_10_discharge"

i = 0
for filename in os.listdir(directory[101:]):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        print(f)
        
        df = pd.read_json(f)
        newfilename = filename[0:-15] # remove 'structure' from filename
        
        # Make csv files with summary data
        summary = select_dataset(df, 'summary')
        summary.to_csv(os.path.join(savepath1, newfilename + '_summary.csv'))
        
        # Make csv files with cycle 10 and 100 discharge data
        cycles_interpolated = select_dataset(df, 'cycles_interpolated')
        cycles_interpolated['cycle_life'] = cycles_interpolated.index[-1] # practical to have the cycle life in the same dataframe
        
        cycle10 = cycles_interpolated.loc[9,:]
        cycle100 = cycles_interpolated.loc[99,:]
        
        cycle10_discharge = cycle10.loc[cycle10['step_type'] == 'discharge']
        cycle100_discharge = cycle100.loc[cycle100['step_type'] == 'discharge']
        
        cycle10_discharge.to_csv(os.path.join(savepath2, newfilename + '_cycle10.csv'))
        cycle100_discharge.to_csv(os.path.join(savepath3, newfilename + '_cycle100.csv'))
        
        # Make csv files with DeltaQ100-10 values (includes cycle life as well)
        DeltaQ100_10 = pd.DataFrame({'discharge_capacity': cycle100_discharge['discharge_capacity'].values\
                                     - cycle10_discharge['discharge_capacity'].values,\
                                     'cycle_life': cycle10_discharge['cycle_life']})
        
        DeltaQ100_10.to_csv(os.path.join(savepath4, newfilename + '_DeltaQ100_10.csv'), index=False)

#### Datasets for Fei et al

In [53]:
directory = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\MLData"
savepath = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_Github\data\interim\cycles_interpolated\first100cycles"

i = 0
for filename in os.listdir(directory)[7]:
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        print(f)
        
        df = pd.read_json(f)
        newfilename = filename[0:-15] # remove 'structure' from filename
        
        # Summary
        summary = select_dataset(df, 'summary')
        
        # Make csv files with first 100 cycles interpolated data
        cycles_interpolated = select_dataset(df, 'cycles_interpolated')
        cycles_interpolated['cycle_life'] = cycles_interpolated.index[-1] # practical to have the cycle life in the same dataframe
        
        # Only first 100 cycles for each cell
        first100cycles = cycles_interpolated.iloc[:200000]
        first100cycles.to_csv(os.path.join(savepath, newfilename + '_first100.csv'))

### Testing

In [46]:
first100cycles

Unnamed: 0_level_0,discharge_capacity,charge_capacity,voltage,internal_resistance,temperature,current,step_type,cycle_life
cycle_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.901052,0.457361,2.8,0.01372,33.279846,-2.52252,discharge,667
0,0.993462,0.553091,2.800701,0.01372,33.677429,-2.820274,discharge,667
0,1.085872,0.648821,2.801401,0.01372,34.075008,-3.118028,discharge,667
0,1.178282,0.744551,2.802102,0.01372,34.472591,-3.415782,discharge,667
0,1.270692,0.840281,2.802803,0.01372,34.870174,-3.713536,discharge,667
...,...,...,...,...,...,...,...,...
199,0.000295,1.022584,3.497197,0.013963,33.951366,-4.28894,discharge,667
199,0.000295,1.022584,3.497898,0.013963,33.951443,-4.275331,discharge,667
199,0.000295,1.022584,3.498599,0.013963,33.951523,-4.261723,discharge,667
199,0.000295,1.022584,3.499299,0.013963,33.951599,-4.248114,discharge,667


In [47]:
first100cycles.loc[0]

Unnamed: 0_level_0,discharge_capacity,charge_capacity,voltage,internal_resistance,temperature,current,step_type,cycle_life
cycle_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.901052,0.457361,2.8,0.01372,33.279846,-2.52252,discharge,667
0,0.993462,0.553091,2.800701,0.01372,33.677429,-2.820274,discharge,667
0,1.085872,0.648821,2.801401,0.01372,34.075008,-3.118028,discharge,667
0,1.178282,0.744551,2.802102,0.01372,34.472591,-3.415782,discharge,667
0,1.270692,0.840281,2.802803,0.01372,34.870174,-3.713536,discharge,667
...,...,...,...,...,...,...,...,...
0,0.491602,1.060961,3.497197,0.01372,30.999151,-4.137796,discharge,667
0,0.491602,1.060961,3.497898,0.01372,30.999186,-4.119509,discharge,667
0,0.491602,1.060961,3.498599,0.01372,30.99922,-4.101223,discharge,667
0,0.491602,1.060961,3.499299,0.01372,30.999243,-4.078812,discharge,667


In [48]:
cycles_interpolated.loc[0]

Unnamed: 0_level_0,discharge_capacity,charge_capacity,voltage,internal_resistance,temperature,current,step_type,cycle_life
cycle_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.901052,0.457361,2.8,0.01372,33.279846,-2.52252,discharge,667
0,0.993462,0.553091,2.800701,0.01372,33.677429,-2.820274,discharge,667
0,1.085872,0.648821,2.801401,0.01372,34.075008,-3.118028,discharge,667
0,1.178282,0.744551,2.802102,0.01372,34.472591,-3.415782,discharge,667
0,1.270692,0.840281,2.802803,0.01372,34.870174,-3.713536,discharge,667
...,...,...,...,...,...,...,...,...
0,,1.754676,,,,,charge,667
0,,1.75644,,,,,charge,667
0,,1.758203,,,,,charge,667
0,,1.759967,,,,,charge,667


In [26]:
cycles_interpolated.loc[cycles_interpolated['step_type'] == 'charge']

Unnamed: 0_level_0,discharge_capacity,charge_capacity,voltage,internal_resistance,temperature,current,step_type,cycle_life
cycle_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,,0.0,,,,,charge,667
0,0.491433,0.001763,2.750942,,30.798277,4.068336,charge,667
0,0.491433,0.003527,2.821381,,30.810385,4.06883,charge,667
0,0.491433,0.00529,2.847158,,30.819374,4.068603,charge,667
0,0.491433,0.007054,2.891067,,30.829554,4.068069,charge,667
...,...,...,...,...,...,...,...,...
667,0.879675,1.754676,3.538074,0.016914,33.842854,0.110018,charge,667
667,0.879675,1.75644,3.555493,0.016914,34.145988,0.110018,charge,667
667,0.879675,1.758203,3.575558,0.016914,34.403564,0.110015,charge,667
667,0.879675,1.759967,3.599326,0.016914,34.706619,0.11002,charge,667


In [43]:
cycles_interpolated

Unnamed: 0_level_0,discharge_capacity,charge_capacity,voltage,internal_resistance,temperature,current,step_type,cycle_life
cycle_index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
0,0.901052,0.457361,2.8,0.01372,33.279846,-2.52252,discharge,667
0,0.993462,0.553091,2.800701,0.01372,33.677429,-2.820274,discharge,667
0,1.085872,0.648821,2.801401,0.01372,34.075008,-3.118028,discharge,667
0,1.178282,0.744551,2.802102,0.01372,34.472591,-3.415782,discharge,667
0,1.270692,0.840281,2.802803,0.01372,34.870174,-3.713536,discharge,667
...,...,...,...,...,...,...,...,...
667,0.879675,1.754676,3.538074,0.016914,33.842854,0.110018,charge,667
667,0.879675,1.75644,3.555493,0.016914,34.145988,0.110018,charge,667
667,0.879675,1.758203,3.575558,0.016914,34.403564,0.110015,charge,667
667,0.879675,1.759967,3.599326,0.016914,34.706619,0.11002,charge,667


In [54]:
summary

NameError: name 'summary' is not defined