### This notebook takes in the raw MIT data and makes the following new datasets:

- Summary data (per cycle)
- Cycles interpolated data for cycle 10 and 100, discharge only
- Difference in discharge capacity between cycle 100 and 10 ($\Delta Q_{100-10}(V)$)


In [2]:
import pathlib
import matplotlib.pyplot as plt
import pandas as pd
import os, random
import numpy as np
from functions import select_dataset

In [None]:
testfile = pathlib.Path(r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\MLData_selected") / "FastCharge_000022_CH24_structure.json"
testfile.is_file()

In [None]:
df = pd.read_json(testfile)

In [None]:
df.head(5)

#### Make Datasets and save as csv files

In [None]:
directory = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\MLData"
savepath1 = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_Github\data\interim\summary"
savepath2 = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_Github\data\interim\cycles_interpolated\cycle10_discharge"
savepath3 = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_Github\data\interim\cycles_interpolated\cycle100_discharge"
savepath4 = r"C:\Users\ife12216\OneDrive - Institutt for Energiteknikk\Documents\Masteroppgave\ML_Github\data\interim\cycles_interpolated\DeltaQ100_10_discharge"

i = 0
for filename in os.listdir(directory[101:]):
    f = os.path.join(directory, filename)
    # checking if it is a file
    if os.path.isfile(f):
        print(f)
        
        df = pd.read_json(f)
        newfilename = filename[0:-15] # remove 'structure' from filename
        
        # Make csv files with summary data
        summary = select_dataset(df, 'summary')
        summary.to_csv(os.path.join(savepath1, newfilename + '_summary.csv'))
        
        # Make csv files with cycle 10 and 100 discharge data
        cycles_interpolated = select_dataset(df, 'cycles_interpolated')
        cycles_interpolated['cycle_life'] = cycles_interpolated.index[-1] # practical to have the cycle life in the same dataframe
        
        cycle10 = cycles_interpolated.loc[9,:]
        cycle100 = cycles_interpolated.loc[99,:]
        
        cycle10_discharge = cycle10.loc[cycle10['step_type'] == 'discharge']
        cycle100_discharge = cycle100.loc[cycle100['step_type'] == 'discharge']
        
        cycle10_discharge.to_csv(os.path.join(savepath2, newfilename + '_cycle10.csv'))
        cycle100_discharge.to_csv(os.path.join(savepath3, newfilename + '_cycle100.csv'))
        
        # Make csv files with DeltaQ100-10 values (includes cycle life as well)
        DeltaQ100_10 = pd.DataFrame({'discharge_capacity': cycle100_discharge['discharge_capacity'].values\
                                     - cycle10_discharge['discharge_capacity'].values,\
                                     'cycle_life': cycle10_discharge['cycle_life']})
        
        DeltaQ100_10.to_csv(os.path.join(savepath4, newfilename + '_DeltaQ100_10.csv'), index=False)

### Testing