In [1]:
import pandas as pd
import numpy as np
import os
import json
import glob
import re
from matplotlib import pyplot as plt
from monty.serialization import loadfn
from sklearn.metrics import r2_score 
from scipy import stats

## Creating New Data File

### LFP

In [2]:
data_dir = "./DeltaQFastCharge/"
file_list = glob.glob(os.path.join(data_dir, '*.json'))
file_list = [f for f in file_list if "LFP" in f]
file_list[:-5]

['./DeltaQFastCharge\\LFP_15C_0-100_0.5-1C_a_CH00_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\LFP_15C_0-100_0.5-1C_a_CH01_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\LFP_15C_0-100_0.5-1C_a_CH02_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\LFP_15C_0-100_0.5-1C_a_CH03_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\LFP_15C_0-100_0.5-1C_a_CH04_features_DeltaQFastCharge.json']

In [3]:
delta_df = pd.DataFrame()
for f in file_list:
    with open(f) as fp:
        data = json.load(fp)
        for k in data['X'].keys():
            data['X'][k] = data['X'][k][0]
        delta_df = delta_df.append(pd.Series(data['X']), ignore_index=True)
delta_df.head()

Unnamed: 0,abs_first_discharge_capacity_difference_cycles_2:100,abs_kurtosis_discharge_capacity_difference_cycles_2:100,abs_mean_discharge_capacity_difference_cycles_2:100,abs_min_discharge_capacity_difference_cycles_2:100,abs_skew_discharge_capacity_difference_cycles_2:100,abs_variance_discharge_capacity_difference_cycles_2:100,charge_time_cycles_1:5,discharge_capacity_cycle_100,discharge_capacity_cycle_2,integrated_time_temperature_cycles_1:100,...,intercept_discharge_capacity_cycle_number_91:100,internal_resistance_cycle_2,internal_resistance_difference_cycles_2:100,max_discharge_capacity_difference,max_temperature_cycles_1:100,min_internal_resistance_cycles_2:100,min_temperature_cycles_1:100,nominal_capacity_by_median,slope_discharge_capacity_cycle_number_2:100,slope_discharge_capacity_cycle_number_91:100
0,-3.309824,1.867028,-2.742859,-0.446806,0.584775,-3.166992,5760.0,1.032,1.03,306697.588291,...,1.032,,,1.035,18.459,,15.16,1.033,-0.000595,1.2177920000000002e-17
1,-3.019408,2.349816,-2.648706,-0.208776,0.993881,-3.056759,5785.600098,1.042,1.04,296427.402899,...,1.042,,,1.043,16.705999,,15.281,1.044,-0.000603,-5.691578e-18
2,-2.312774,1.694375,-1.917594,-0.195631,0.655882,-2.478396,5811.200195,1.03,1.03,262556.750559,...,1.03,,,1.029,20.877001,,15.107,1.031,-0.000596,1.727324e-17
3,-2.677882,1.49585,-2.07642,-0.63173,0.057256,-2.981589,5785.600098,1.025,1.029,268508.727464,...,1.025,,,1.026,22.084999,,15.306,1.028,-0.000626,1.9431250000000002e-18
4,-2.005443,0.393445,-1.682799,-0.189399,-0.220132,-1.291194,5785.600098,1.059,1.067,614925.496456,...,1.068364,,,1.074,25.669001,,24.084999,1.067,-0.000724,-9.69697e-05


In [4]:
delta_df = delta_df.drop(columns=['internal_resistance_cycle_2', 'internal_resistance_difference_cycles_2:100', 'min_internal_resistance_cycles_2:100'], axis=0)
delta_df.shape

(30, 18)

In [5]:
delta_df.isna().sum(axis=0)

abs_first_discharge_capacity_difference_cycles_2:100       0
abs_kurtosis_discharge_capacity_difference_cycles_2:100    0
abs_mean_discharge_capacity_difference_cycles_2:100        0
abs_min_discharge_capacity_difference_cycles_2:100         0
abs_skew_discharge_capacity_difference_cycles_2:100        0
abs_variance_discharge_capacity_difference_cycles_2:100    0
charge_time_cycles_1:5                                     0
discharge_capacity_cycle_100                               0
discharge_capacity_cycle_2                                 0
integrated_time_temperature_cycles_1:100                   0
intercept_discharge_capacity_cycle_number_2:100            0
intercept_discharge_capacity_cycle_number_91:100           0
max_discharge_capacity_difference                          0
max_temperature_cycles_1:100                               1
min_temperature_cycles_1:100                               1
nominal_capacity_by_median                                 0
slope_discharge_capacity

In [6]:
data_dir2 = "./TrajectoryFastCharge/"
file_list = glob.glob(os.path.join(data_dir2, '*.json'))
file_list = [f for f in file_list if "LFP" in f]

traj_df = pd.DataFrame()
for f in file_list:
    with open(f) as fp:
        data = json.load(fp)
        traj_df = traj_df.append(pd.Series(data['X']).apply(lambda x: x[0]), ignore_index=True)

In [7]:
traj_df.head()

Unnamed: 0,capacity_0.8,capacity_0.83,capacity_0.86,capacity_0.89,capacity_0.92,capacity_0.95,capacity_0.98
0,4550.0,4550.0,4550.0,4550.0,506.0,506.0,506.0
1,3552.0,3552.0,3552.0,3552.0,3552.0,506.0,506.0
2,3760.0,3760.0,3760.0,3760.0,506.0,506.0,506.0
3,3753.0,3753.0,3753.0,3753.0,506.0,506.0,506.0
4,3049.0,3049.0,3049.0,3049.0,207.0,207.0,207.0


In [8]:
full_df = pd.merge(delta_df,traj_df, left_index=True, right_index=True)
full_df.head()

Unnamed: 0,abs_first_discharge_capacity_difference_cycles_2:100,abs_kurtosis_discharge_capacity_difference_cycles_2:100,abs_mean_discharge_capacity_difference_cycles_2:100,abs_min_discharge_capacity_difference_cycles_2:100,abs_skew_discharge_capacity_difference_cycles_2:100,abs_variance_discharge_capacity_difference_cycles_2:100,charge_time_cycles_1:5,discharge_capacity_cycle_100,discharge_capacity_cycle_2,integrated_time_temperature_cycles_1:100,...,nominal_capacity_by_median,slope_discharge_capacity_cycle_number_2:100,slope_discharge_capacity_cycle_number_91:100,capacity_0.8,capacity_0.83,capacity_0.86,capacity_0.89,capacity_0.92,capacity_0.95,capacity_0.98
0,-3.309824,1.867028,-2.742859,-0.446806,0.584775,-3.166992,5760.0,1.032,1.03,306697.588291,...,1.033,-0.000595,1.2177920000000002e-17,4550.0,4550.0,4550.0,4550.0,506.0,506.0,506.0
1,-3.019408,2.349816,-2.648706,-0.208776,0.993881,-3.056759,5785.600098,1.042,1.04,296427.402899,...,1.044,-0.000603,-5.691578e-18,3552.0,3552.0,3552.0,3552.0,3552.0,506.0,506.0
2,-2.312774,1.694375,-1.917594,-0.195631,0.655882,-2.478396,5811.200195,1.03,1.03,262556.750559,...,1.031,-0.000596,1.727324e-17,3760.0,3760.0,3760.0,3760.0,506.0,506.0,506.0
3,-2.677882,1.49585,-2.07642,-0.63173,0.057256,-2.981589,5785.600098,1.025,1.029,268508.727464,...,1.028,-0.000626,1.9431250000000002e-18,3753.0,3753.0,3753.0,3753.0,506.0,506.0,506.0
4,-2.005443,0.393445,-1.682799,-0.189399,-0.220132,-1.291194,5785.600098,1.059,1.067,614925.496456,...,1.067,-0.000724,-9.69697e-05,3049.0,3049.0,3049.0,3049.0,207.0,207.0,207.0


In [9]:
full_df.to_csv("LFPBatteries.csv", index=False)

### NCA

In [4]:
data_dir = "./DeltaQFastCharge/"
file_list = glob.glob(os.path.join(data_dir, '*.json'))
file_list = [f for f in file_list if "NCA" in f]
file_list[:5]

['./DeltaQFastCharge\\NCA_15C_0-100_0.5-1C_a_CH00_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\NCA_15C_0-100_0.5-1C_b_CH01_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\NCA_15C_0-100_0.5-2C_a_CH02_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\NCA_15C_0-100_0.5-2C_b_CH03_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\NCA_25C_0-100_0.5-0.5C_a_CH04_features_DeltaQFastCharge.json']

In [5]:
delta_df = pd.DataFrame()
for f in file_list:
    with open(f) as fp:
        data = json.load(fp)
        for k in data['X'].keys():
            data['X'][k] = data['X'][k][0]
        delta_df = delta_df.append(pd.Series(data['X']), ignore_index=True)
delta_df.head()

Unnamed: 0,abs_first_discharge_capacity_difference_cycles_2:100,abs_kurtosis_discharge_capacity_difference_cycles_2:100,abs_mean_discharge_capacity_difference_cycles_2:100,abs_min_discharge_capacity_difference_cycles_2:100,abs_skew_discharge_capacity_difference_cycles_2:100,abs_variance_discharge_capacity_difference_cycles_2:100,charge_time_cycles_1:5,discharge_capacity_cycle_100,discharge_capacity_cycle_2,integrated_time_temperature_cycles_1:100,...,intercept_discharge_capacity_cycle_number_91:100,internal_resistance_cycle_2,internal_resistance_difference_cycles_2:100,max_discharge_capacity_difference,max_temperature_cycles_1:100,min_internal_resistance_cycles_2:100,min_temperature_cycles_1:100,nominal_capacity_by_median,slope_discharge_capacity_cycle_number_2:100,slope_discharge_capacity_cycle_number_91:100
0,-0.581882,0.091227,-0.652902,-0.556555,-0.198558,-2.537839,1920.0,2.619,2.911,358190.7,...,2.878018,,,2.817,30.881001,,16.419001,2.824,-0.004213,-0.002618
1,-0.718078,0.089352,-0.82197,-0.705399,-0.252584,-2.727858,1920.0,2.662,2.895,282382.8,...,2.828491,,,2.795,19.851,,15.94,2.8,-0.003508,-0.001691
2,-0.705683,0.852673,-1.107777,0.148826,0.321565,-1.115799,1971.199951,2.543,2.87,344613.3,...,2.904036,,,2.514,47.819,,16.6,2.799,-0.004496,-0.003636
3,-0.600825,0.183003,-1.070569,0.414443,-0.334105,-0.100452,1996.800049,2.56,2.901,344374.0,...,2.863273,,,2.435,48.041,,16.5,2.798,-0.004436,-0.003073
4,-0.835342,0.101538,-0.537124,0.23044,0.188283,-0.195601,1984.0,2.901,3.161,1130774.0,...,1.242109,,,0.0,32.912998,,27.773001,3.0095,-0.001405,0.014958


In [6]:
delta_df = delta_df.drop(columns=['internal_resistance_cycle_2', 'internal_resistance_difference_cycles_2:100', 'min_internal_resistance_cycles_2:100'], axis=0)
delta_df.shape

(24, 18)

In [7]:
delta_df.isna().sum(axis=0)

abs_first_discharge_capacity_difference_cycles_2:100       0
abs_kurtosis_discharge_capacity_difference_cycles_2:100    0
abs_mean_discharge_capacity_difference_cycles_2:100        0
abs_min_discharge_capacity_difference_cycles_2:100         0
abs_skew_discharge_capacity_difference_cycles_2:100        0
abs_variance_discharge_capacity_difference_cycles_2:100    0
charge_time_cycles_1:5                                     0
discharge_capacity_cycle_100                               0
discharge_capacity_cycle_2                                 0
integrated_time_temperature_cycles_1:100                   0
intercept_discharge_capacity_cycle_number_2:100            0
intercept_discharge_capacity_cycle_number_91:100           0
max_discharge_capacity_difference                          0
max_temperature_cycles_1:100                               1
min_temperature_cycles_1:100                               1
nominal_capacity_by_median                                 0
slope_discharge_capacity

In [9]:
data_dir2 = "./TrajectoryFastCharge/"
file_list = glob.glob(os.path.join(data_dir2, '*.json'))
file_list = [f for f in file_list if "NCA" in f]

traj_df = pd.DataFrame()
for f in file_list:
    with open(f) as fp:
        data = json.load(fp)
        traj_df = traj_df.append(pd.Series(data['X']).apply(lambda x: x[0]), ignore_index=True)

In [11]:
traj_df

Unnamed: 0,capacity_0.8,capacity_0.83,capacity_0.86,capacity_0.89,capacity_0.92,capacity_0.95,capacity_0.98
0,521.0,257.0,190.0,144.0,108.0,77.0,38.0
1,257.0,257.0,257.0,232.0,156.0,102.0,49.0
2,255.0,205.0,158.0,119.0,91.0,69.0,47.0
3,783.0,233.0,175.0,130.0,94.0,69.0,45.0
4,4.0,4.0,4.0,4.0,4.0,4.0,4.0
5,134.0,134.0,134.0,134.0,94.0,74.0,44.0
6,653.0,257.0,257.0,232.0,171.0,113.0,49.0
7,521.0,521.0,257.0,216.0,145.0,109.0,49.0
8,521.0,257.0,242.0,190.0,147.0,102.0,54.0
9,257.0,257.0,257.0,202.0,156.0,95.0,51.0


In [12]:
traj_df.head()

Unnamed: 0,capacity_0.8,capacity_0.83,capacity_0.86,capacity_0.89,capacity_0.92,capacity_0.95,capacity_0.98
0,521.0,257.0,190.0,144.0,108.0,77.0,38.0
1,257.0,257.0,257.0,232.0,156.0,102.0,49.0
2,255.0,205.0,158.0,119.0,91.0,69.0,47.0
3,783.0,233.0,175.0,130.0,94.0,69.0,45.0
4,4.0,4.0,4.0,4.0,4.0,4.0,4.0


In [14]:
full_df = pd.merge(delta_df,traj_df, left_index=True, right_index=True)
full_df.head()

Unnamed: 0,abs_first_discharge_capacity_difference_cycles_2:100,abs_kurtosis_discharge_capacity_difference_cycles_2:100,abs_mean_discharge_capacity_difference_cycles_2:100,abs_min_discharge_capacity_difference_cycles_2:100,abs_skew_discharge_capacity_difference_cycles_2:100,abs_variance_discharge_capacity_difference_cycles_2:100,charge_time_cycles_1:5,discharge_capacity_cycle_100,discharge_capacity_cycle_2,integrated_time_temperature_cycles_1:100,...,nominal_capacity_by_median,slope_discharge_capacity_cycle_number_2:100,slope_discharge_capacity_cycle_number_91:100,capacity_0.8,capacity_0.83,capacity_0.86,capacity_0.89,capacity_0.92,capacity_0.95,capacity_0.98
0,-0.581882,0.091227,-0.652902,-0.556555,-0.198558,-2.537839,1920.0,2.619,2.911,358190.7,...,2.824,-0.004213,-0.002618,521.0,257.0,190.0,144.0,108.0,77.0,38.0
1,-0.718078,0.089352,-0.82197,-0.705399,-0.252584,-2.727858,1920.0,2.662,2.895,282382.8,...,2.8,-0.003508,-0.001691,257.0,257.0,257.0,232.0,156.0,102.0,49.0
2,-0.705683,0.852673,-1.107777,0.148826,0.321565,-1.115799,1971.199951,2.543,2.87,344613.3,...,2.799,-0.004496,-0.003636,255.0,205.0,158.0,119.0,91.0,69.0,47.0
3,-0.600825,0.183003,-1.070569,0.414443,-0.334105,-0.100452,1996.800049,2.56,2.901,344374.0,...,2.798,-0.004436,-0.003073,783.0,233.0,175.0,130.0,94.0,69.0,45.0
4,-0.835342,0.101538,-0.537124,0.23044,0.188283,-0.195601,1984.0,2.901,3.161,1130774.0,...,3.0095,-0.001405,0.014958,4.0,4.0,4.0,4.0,4.0,4.0,4.0


In [15]:
full_df.to_csv("NCABatteries.csv", index=False)

### NMC

In [16]:
data_dir = "./DeltaQFastCharge/"
file_list = glob.glob(os.path.join(data_dir, '*.json'))
file_list = [f for f in file_list if "NMC" in f]
file_list[:5]

['./DeltaQFastCharge\\NMC_15C_0-100_0.5-1C_a_CH00_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\NMC_15C_0-100_0.5-1C_b_CH01_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\NMC_15C_0-100_0.5-2C_a_CH02_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\NMC_15C_0-100_0.5-2C_b_CH03_features_DeltaQFastCharge.json',
 './DeltaQFastCharge\\NMC_25C_0-100_0.5-0.5C_a_CH04_features_DeltaQFastCharge.json']

In [17]:
delta_df = pd.DataFrame()
for f in file_list:
    with open(f) as fp:
        data = json.load(fp)
        for k in data['X'].keys():
            data['X'][k] = data['X'][k][0]
        delta_df = delta_df.append(pd.Series(data['X']), ignore_index=True)
delta_df.head()

Unnamed: 0,abs_first_discharge_capacity_difference_cycles_2:100,abs_kurtosis_discharge_capacity_difference_cycles_2:100,abs_mean_discharge_capacity_difference_cycles_2:100,abs_min_discharge_capacity_difference_cycles_2:100,abs_skew_discharge_capacity_difference_cycles_2:100,abs_variance_discharge_capacity_difference_cycles_2:100,charge_time_cycles_1:5,discharge_capacity_cycle_100,discharge_capacity_cycle_2,integrated_time_temperature_cycles_1:100,...,intercept_discharge_capacity_cycle_number_91:100,internal_resistance_cycle_2,internal_resistance_difference_cycles_2:100,max_discharge_capacity_difference,max_temperature_cycles_1:100,min_internal_resistance_cycles_2:100,min_temperature_cycles_1:100,nominal_capacity_by_median,slope_discharge_capacity_cycle_number_2:100,slope_discharge_capacity_cycle_number_91:100
0,-0.619562,0.933,-1.086863,0.197719,0.120984,-0.887046,2099.199951,2.296,2.697,311213.419478,...,2.644455,,,2.634,24.450001,,16.409,2.578,-0.00524,-0.003521
1,-0.875078,0.76493,-1.212572,0.099954,-0.164491,-0.962576,2073.600098,2.287,2.579,299337.038784,...,2.640636,,,2.51,24.882999,,16.264,2.5495,-0.004539,-0.00357
2,-0.688966,0.362048,-1.339363,0.033846,-0.055124,-0.844389,2073.600098,2.316,2.724,279772.048771,...,2.652673,,,2.69,31.305,,16.363001,2.5985,-0.005365,-0.003406
3,-0.671967,0.62331,-1.583912,0.002805,0.129267,-1.020248,2073.600098,2.294,2.714,286196.086521,...,2.636782,,,2.676,35.325001,,16.077,2.577,-0.005289,-0.003448
4,-0.882375,0.709305,-1.173814,0.352848,-0.553786,-0.492553,2099.199951,2.661,2.907,590770.753524,...,2.824455,,,2.849,30.141001,,25.106001,2.681,-0.00473,-0.001655


In [18]:
delta_df = delta_df.drop(columns=['internal_resistance_cycle_2', 'internal_resistance_difference_cycles_2:100', 'min_internal_resistance_cycles_2:100'], axis=0)
delta_df.shape

(18, 18)

In [19]:
delta_df.isna().sum(axis=0)

abs_first_discharge_capacity_difference_cycles_2:100       0
abs_kurtosis_discharge_capacity_difference_cycles_2:100    0
abs_mean_discharge_capacity_difference_cycles_2:100        0
abs_min_discharge_capacity_difference_cycles_2:100         0
abs_skew_discharge_capacity_difference_cycles_2:100        0
abs_variance_discharge_capacity_difference_cycles_2:100    0
charge_time_cycles_1:5                                     0
discharge_capacity_cycle_100                               0
discharge_capacity_cycle_2                                 0
integrated_time_temperature_cycles_1:100                   0
intercept_discharge_capacity_cycle_number_2:100            0
intercept_discharge_capacity_cycle_number_91:100           0
max_discharge_capacity_difference                          0
max_temperature_cycles_1:100                               0
min_temperature_cycles_1:100                               0
nominal_capacity_by_median                                 0
slope_discharge_capacity

In [20]:
data_dir2 = "./TrajectoryFastCharge/"
file_list = glob.glob(os.path.join(data_dir2, '*.json'))
file_list = [f for f in file_list if "NMC" in f]

traj_df = pd.DataFrame()
for f in file_list:
    with open(f) as fp:
        data = json.load(fp)
        traj_df = traj_df.append(pd.Series(data['X']).apply(lambda x: x[0]), ignore_index=True)

In [21]:
traj_df

Unnamed: 0,capacity_0.8,capacity_0.83,capacity_0.86,capacity_0.89,capacity_0.92,capacity_0.95,capacity_0.98
0,206.0,156.0,124.0,100.0,78.0,57.0,35.0
1,226.0,164.0,130.0,105.0,83.0,62.0,43.0
2,197.0,154.0,125.0,101.0,78.0,55.0,33.0
3,195.0,152.0,123.0,100.0,78.0,55.0,2.0
4,552.0,552.0,61.0,57.0,49.0,34.0,27.0
5,207.0,207.0,207.0,207.0,159.0,69.0,35.0
6,520.0,520.0,256.0,256.0,165.0,77.0,36.0
7,784.0,784.0,256.0,256.0,175.0,82.0,37.0
8,396.0,396.0,132.0,132.0,132.0,79.0,37.0
9,528.0,132.0,132.0,132.0,132.0,76.0,35.0


In [22]:
traj_df.head()

Unnamed: 0,capacity_0.8,capacity_0.83,capacity_0.86,capacity_0.89,capacity_0.92,capacity_0.95,capacity_0.98
0,206.0,156.0,124.0,100.0,78.0,57.0,35.0
1,226.0,164.0,130.0,105.0,83.0,62.0,43.0
2,197.0,154.0,125.0,101.0,78.0,55.0,33.0
3,195.0,152.0,123.0,100.0,78.0,55.0,2.0
4,552.0,552.0,61.0,57.0,49.0,34.0,27.0


In [23]:
full_df = pd.merge(delta_df,traj_df, left_index=True, right_index=True)
full_df.head()

Unnamed: 0,abs_first_discharge_capacity_difference_cycles_2:100,abs_kurtosis_discharge_capacity_difference_cycles_2:100,abs_mean_discharge_capacity_difference_cycles_2:100,abs_min_discharge_capacity_difference_cycles_2:100,abs_skew_discharge_capacity_difference_cycles_2:100,abs_variance_discharge_capacity_difference_cycles_2:100,charge_time_cycles_1:5,discharge_capacity_cycle_100,discharge_capacity_cycle_2,integrated_time_temperature_cycles_1:100,...,nominal_capacity_by_median,slope_discharge_capacity_cycle_number_2:100,slope_discharge_capacity_cycle_number_91:100,capacity_0.8,capacity_0.83,capacity_0.86,capacity_0.89,capacity_0.92,capacity_0.95,capacity_0.98
0,-0.619562,0.933,-1.086863,0.197719,0.120984,-0.887046,2099.199951,2.296,2.697,311213.419478,...,2.578,-0.00524,-0.003521,206.0,156.0,124.0,100.0,78.0,57.0,35.0
1,-0.875078,0.76493,-1.212572,0.099954,-0.164491,-0.962576,2073.600098,2.287,2.579,299337.038784,...,2.5495,-0.004539,-0.00357,226.0,164.0,130.0,105.0,83.0,62.0,43.0
2,-0.688966,0.362048,-1.339363,0.033846,-0.055124,-0.844389,2073.600098,2.316,2.724,279772.048771,...,2.5985,-0.005365,-0.003406,197.0,154.0,125.0,101.0,78.0,55.0,33.0
3,-0.671967,0.62331,-1.583912,0.002805,0.129267,-1.020248,2073.600098,2.294,2.714,286196.086521,...,2.577,-0.005289,-0.003448,195.0,152.0,123.0,100.0,78.0,55.0,2.0
4,-0.882375,0.709305,-1.173814,0.352848,-0.553786,-0.492553,2099.199951,2.661,2.907,590770.753524,...,2.681,-0.00473,-0.001655,552.0,552.0,61.0,57.0,49.0,34.0,27.0


In [24]:
full_df.to_csv("NMCBatteries.csv", index=False)

## Creating Original Data File

In [124]:
#d_dir = '/../Toyota/features/DeltaQFastCharge' # Directory for DeltaQFastCharge
d_dir = 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/DeltaQFastCharge' # Directory for DeltaQFastCharge
t_dir = 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/TrajectoryFastCharge' # Directory for TrajectoryFastCharge
#t_dir = './Toyota/features/TrajectoryFastCharge' # Directory for TrajectoryFastCharge

In [125]:
def getListOfFiles(dirName):
    listOfFile = os.listdir(dirName)
    allFiles = list()
    for entry in listOfFile:
        fullPath = os.path.join(dirName, entry)
        if ".json" in fullPath:
            allFiles.append(fullPath)
    return allFiles

Getting Features

In [126]:
d_list = getListOfFiles(d_dir)
print(len(d_list))
d_list

139


['C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/DeltaQFastCharge\\FastCharge_000000_CH19_features_DeltaQFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/DeltaQFastCharge\\FastCharge_000001_CH16_features_DeltaQFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/DeltaQFastCharge\\FastCharge_000001_CH30_features_DeltaQFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/DeltaQFastCharge\\FastCharge_000001_CH38_features_DeltaQFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/DeltaQFastCharge\\FastCharge_000002_CH10_features_DeltaQFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/DeltaQFastCharge\\FastCharge_000002_CH18_features_DeltaQFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/DeltaQFastCharge\\FastCharge_000002_CH2_features_DeltaQFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/Del

In [127]:
new_names = []
for file in d_list:
    new_names.append(re.findall("CH(.*?)\_f", file)[0])
# new_names

In [128]:
d_df = pd.DataFrame()
for file in d_list:
    with open(file) as data_file:    
        data = json.load(data_file)
        data_df = pd.json_normalize(data)
        d_df = pd.concat([d_df,data_df])
d_df.reset_index(drop=True,inplace=True)
d_df['bat_num'] = new_names
d_df.head()

Unnamed: 0,@module,@class,name,@version,X.discharge_capacity_cycle_2,X.max_discharge_capacity_difference,X.discharge_capacity_cycle_100,X.integrated_time_temperature_cycles_1:100,X.charge_time_cycles_1:5,X.abs_min_discharge_capacity_difference_cycles_2:100,...,X.internal_resistance_difference_cycles_2:100,X.nominal_capacity_by_median,metadata.barcode,metadata.protocol,metadata.channel_id,metadata.parameters.init_pred_cycle,metadata.parameters.mid_pred_cycle,metadata.parameters.final_pred_cycle,metadata.parameters.n_nominal_cycles,bat_num
0,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[1.0454259],[0.8891461000000003],[1.0331815],[231072.4263671875],[614.4000244140625],[-1.1444158302992298],...,[-0.0010415539145469666],[1.047977],el150800440551,2017-06-30_tests\20170630-4_4C_55per_6C.sdu,18,10,91,100,40,19
1,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[1.0620247999999999],[0.48970650000000004],[1.0439155],[149767.85131961218],[563.2000122070312],[-1.2773591296554208],...,[0.0003414759412407875],[1.06011365],el150800737229,2018-04-12_batch8\20180412-3_7C_31per_5_9C_new...,15,10,91,100,40,16
2,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[1.0678786],[0.4961450999999999],[1.054091],[205315.4877278646],[614.4000244140625],[-1.3752393675218624],...,[0.00024036318063735962],[1.06736225],el150800737366,2018-04-12_batch8\20180412-3_7C_31per_5_9C_new...,29,10,91,100,40,30
3,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[1.0512739],[0.49011190000000004],[1.0341983000000001],[159799.4890051929],[563.2000122070312],[-1.2430890844325302],...,[0.00042933784425258636],[1.05098615],el150800737234,2018-04-12_batch8\20180412-3_7C_31per_5_9C_new...,37,10,91,100,40,38
4,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[1.0665728],[0.47571469999999993],[1.0695093],[146029.5250360061],[614.4000244140625],[-1.640443517452452],...,[-0.0001586293801665306],[1.0708095],el150800737329,2018-04-12_batch8\20180412-5C_67per_4C_newstru...,9,10,91,100,40,10


In [129]:
batches = []
for i in d_df['metadata.protocol']:
    if '2017-05' in i:
        batches.append('b1')
    elif '2017-06' in i:
        batches.append('b2')
    elif '2018-04' in i:
        batches.append('b3')
# batches

In [130]:
d_df['batches'] = batches
d_df['bat_name'] = d_df['batches'] + 'c' + d_df['bat_num']

In [131]:
d_df['bat_name']

0      b2c19
1      b3c16
2      b3c30
3      b3c38
4      b3c10
       ...  
134    b2c25
135    b2c46
136    b2c40
137    b2c34
138    b2c35
Name: bat_name, Length: 139, dtype: object

Response variable data

In [132]:
t_list = getListOfFiles(t_dir)
print(len(t_list))
t_list

139


['C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/TrajectoryFastCharge\\FastCharge_000000_CH19_features_TrajectoryFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/TrajectoryFastCharge\\FastCharge_000001_CH16_features_TrajectoryFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/TrajectoryFastCharge\\FastCharge_000001_CH30_features_TrajectoryFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/TrajectoryFastCharge\\FastCharge_000001_CH38_features_TrajectoryFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/TrajectoryFastCharge\\FastCharge_000002_CH10_features_TrajectoryFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/TrajectoryFastCharge\\FastCharge_000002_CH18_features_TrajectoryFastCharge.json',
 'C:/Users/mattm/Documents/CalPoly/DATA 451/Toyota/features/TrajectoryFastCharge\\FastCharge_000002_CH2_features_TrajectoryFastCharge.json',
 'C:/Us

In [133]:
t_df = pd.DataFrame()
for file in t_list:
    with open(file) as data_file:    
        data = json.load(data_file)
        data_df = pd.json_normalize(data)
        t_df = pd.concat([t_df,data_df])
t_df.reset_index(inplace=True)
t_df.head()

Unnamed: 0,index,@module,@class,name,@version,X.capacity_0.98,X.capacity_0.95,X.capacity_0.92,X.capacity_0.89,X.capacity_0.86,X.capacity_0.83,X.capacity_0.8,metadata.barcode,metadata.protocol,metadata.channel_id,metadata.parameters.thresh_max_cap,metadata.parameters.thresh_min_cap,metadata.parameters.interval_cap
0,0,beep.featurize,TrajectoryFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[127],[269],[376],[423],[449],[467],[483],el150800440551,2017-06-30_tests\20170630-4_4C_55per_6C.sdu,18,0.98,0.78,0.03
1,0,beep.featurize,TrajectoryFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[125],[276],[426],[528],[613],[668],[668],el150800737229,2018-04-12_batch8\20180412-3_7C_31per_5_9C_new...,15,0.98,0.78,0.03
2,0,beep.featurize,TrajectoryFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[774],[774],[774],[774],[774],[774],[774],el150800737366,2018-04-12_batch8\20180412-3_7C_31per_5_9C_new...,29,0.98,0.78,0.03
3,0,beep.featurize,TrajectoryFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[543],[543],[543],[543],[543],[543],[543],el150800737234,2018-04-12_batch8\20180412-3_7C_31per_5_9C_new...,37,0.98,0.78,0.03
4,0,beep.featurize,TrajectoryFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,[1011],[1011],[1011],[1011],[1011],[1011],[1011],el150800737329,2018-04-12_batch8\20180412-5C_67per_4C_newstru...,9,0.98,0.78,0.03


In [134]:
df_full = d_df.merge(t_df, left_index=True, right_index=True)

# Making values not stored as lists
for column in df_full.columns:
    if "X." in column:
        df_full[column] = df_full[column].apply(lambda x: x[0])

#df_full.sort_values(by=['bat_name'],inplace=True)
df_full.reset_index(inplace=True)
df_full

Unnamed: 0,level_0,@module_x,@class_x,name_x,@version_x,X.discharge_capacity_cycle_2,X.max_discharge_capacity_difference,X.discharge_capacity_cycle_100,X.integrated_time_temperature_cycles_1:100,X.charge_time_cycles_1:5,...,X.capacity_0.89,X.capacity_0.86,X.capacity_0.83,X.capacity_0.8,metadata.barcode_y,metadata.protocol_y,metadata.channel_id_y,metadata.parameters.thresh_max_cap,metadata.parameters.thresh_min_cap,metadata.parameters.interval_cap
0,0,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,1.045426,0.889146,1.033181,231072.426367,614.400024,...,423,449,467,483,el150800440551,2017-06-30_tests\20170630-4_4C_55per_6C.sdu,18,0.98,0.78,0.03
1,1,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,1.062025,0.489707,1.043915,149767.851320,563.200012,...,528,613,668,668,el150800737229,2018-04-12_batch8\20180412-3_7C_31per_5_9C_new...,15,0.98,0.78,0.03
2,2,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,1.067879,0.496145,1.054091,205315.487728,614.400024,...,774,774,774,774,el150800737366,2018-04-12_batch8\20180412-3_7C_31per_5_9C_new...,29,0.98,0.78,0.03
3,3,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,1.051274,0.490112,1.034198,159799.489005,563.200012,...,543,543,543,543,el150800737234,2018-04-12_batch8\20180412-3_7C_31per_5_9C_new...,37,0.98,0.78,0.03
4,4,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,1.066573,0.475715,1.069509,146029.525036,614.400024,...,1011,1011,1011,1011,el150800737329,2018-04-12_batch8\20180412-5C_67per_4C_newstru...,9,0.98,0.78,0.03
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
134,134,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,1.068216,0.887755,1.076756,233369.177230,588.799988,...,250,483,503,522,el150800460610,2017-06-30_tests\20170630-4_9C_61per_4_5C.sdu,24,0.98,0.78,0.03
135,135,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,1.066163,0.886750,1.073273,231731.725590,640.000000,...,443,462,480,497,el150800460613,2017-06-30_tests\20170630-6C_4per_4_75C.sdu,45,0.98,0.78,0.03
136,136,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,1.074378,0.888332,1.086167,228614.760124,614.400024,...,420,435,449,464,el150800460627,2017-06-30_tests\20170630-5_6C_58per_3_5C.sdu,39,0.98,0.78,0.03
137,137,beep.featurize,DeltaQFastCharge,/mnt/c/Users/sjrt1/Documents/School/Winter2021...,2021.1.11.15,1.075889,0.892131,1.084460,234874.822253,588.799988,...,472,491,509,527,el150800460641,2017-06-30_tests\20170630-5_2C_58per_4C.sdu,33,0.98,0.78,0.03


In [135]:
df_full.to_csv("OldDataFastCharge.csv", index=False)