In [1]:
# from IPython.core.interactiveshell import InteractiveShell
# InteractiveShell.ast_node_interactivity = "all"

In [2]:
import os
import pandas as pd
from pathlib import Path
from urllib.error import HTTPError

# Parse the Data

In [3]:
def extract_column(dataset, attr, *, regex=''):
    """
    Returns a list samples of an attribute and
    constrained by some regular expression.
    
    Positional Arguments:
    dataset -- a csv file
    attr -- an attribute of the dataset 
    
    Keyword (Required) Arguments:
    regex: the key used to filter specific datapoints (defaults to all)
    """
    df = pd.read_csv(dataset)[attr].squeeze()
    df.replace('/', '-', regex=True, inplace=True)
    return [dp[0] for dp in df.str.findall(fr'^{regex}.*') if len(dp)]

# Save and Load the Data

In [4]:
repos = (r'https://www.batteryarchive.org/data/' + url + r'_timeseries.csv'
         for url in extract_column('metadata.csv', 'cell_id', regex='SNL'))

def load_battery_data(path):
    """
    Downloads the battery data, encoded as a pickle object (*.pkl)
    into the datasets directory.
    
    Data source -- @www.batteryarchive.org
    """
    
    try:
        os.chdir(path / r'483_ML_BatteryLife/')
        data_path = r'datasets/battery.pkl'
        if not Path('datasets/').is_dir():
            Path("datasets").mkdir(parents=True, exist_ok=True)
        if not Path(data_path).is_file():
            df = pd.concat([pd.read_csv(url) for url in repos])
            df.to_pickle(data_path)
            print("Download Complete!")
        return pd.read_pickle(data_path)
    except HTTPError:
        print("Download Failed!")

In [5]:
battery = load_battery_data(Path(r'/home/jason/'))  # change path to Github repository

# Look at the Data

In [6]:
battery.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 23888882 entries, 0 to 80104
Data columns (total 11 columns):
 #   Column                       Dtype  
---  ------                       -----  
 0   Date_Time                    object 
 1   Test_Time (s)                float64
 2   Cycle_Index                  float64
 3   Current (A)                  float64
 4   Voltage (V)                  float64
 5   Charge_Capacity (Ah)         float64
 6   Discharge_Capacity (Ah)      float64
 7   Charge_Energy (Wh)           float64
 8   Discharge_Energy (Wh)        float64
 9   Environment_Temperature (C)  float64
 10  Cell_Temperature (C)         float64
dtypes: float64(10), object(1)
memory usage: 2.1+ GB


In [7]:
battery.head()

Unnamed: 0,Date_Time,Test_Time (s),Cycle_Index,Current (A),Voltage (V),Charge_Capacity (Ah),Discharge_Capacity (Ah),Charge_Energy (Wh),Discharge_Energy (Wh),Environment_Temperature (C),Cell_Temperature (C)
0,2018-02-01 10:02:10.002962,10.002,1.0,0.0,2.94,0.0,0.0,0.0,0.0,15.163,15.375
1,2018-02-01 10:02:20.016433,20.016,1.0,0.0,2.941,0.0,0.0,0.0,0.0,15.148,15.437
2,2018-02-01 10:02:30.031429,30.031,1.0,0.0,2.941,0.0,0.0,0.0,0.0,15.087,15.375
3,2018-02-01 10:02:40.046426,40.046,1.0,0.0,2.941,0.0,0.0,0.0,0.0,15.24,15.375
4,2018-02-01 10:02:50.061462,50.061,1.0,0.0,2.941,0.0,0.0,0.0,0.0,15.087,15.375


In [None]:
battery.describe()

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

plt.rc('font', size=14)
plt.rc('axes', labelsize=14, titlesize=10)
plt.rc('legend', fontsize=14)
plt.rc('xtick', labelsize=8)
plt.rc('ytick', labelsize=8)

battery.hist(bins=50, figsize=(10, 10))
plt.show()