In [None]:
import datetime
import numpy as np
import pandas as pd
from scipy.io import loadmat

## download data with bash

In [None]:
!wget -cq https://ti.arc.nasa.gov/c/5 -O nasa.zip
!unzip -qqo nasa.zip -d battery_data

## function to convert the data from matlab format to dataframe

In [None]:
# Based on https://www.kaggle.com/rajeevsharma993/battery-health-nasa-dataset
def load_data(battery):
  mat = loadmat('battery_data/' + battery + '.mat')
  print('Total data in dataset: ', len(mat[battery][0, 0]['cycle'][0]))
  counter = 0
  dataset = []
  capacity_data = []
  
  for i in range(len(mat[battery][0, 0]['cycle'][0])):
    row = mat[battery][0, 0]['cycle'][0, i]
    if row['type'][0] == 'discharge':
      ambient_temperature = row['ambient_temperature'][0][0]
      date_time = datetime.datetime(int(row['time'][0][0]),
                               int(row['time'][0][1]),
                               int(row['time'][0][2]),
                               int(row['time'][0][3]),
                               int(row['time'][0][4])) + datetime.timedelta(seconds=int(row['time'][0][5]))
      data = row['data']
      capacity = data[0][0]['Capacity'][0][0]
      for j in range(len(data[0][0]['Voltage_measured'][0])):
        voltage_measured = data[0][0]['Voltage_measured'][0][j]
        current_measured = data[0][0]['Current_measured'][0][j]
        temperature_measured = data[0][0]['Temperature_measured'][0][j]
        current_load = data[0][0]['Current_load'][0][j]
        voltage_load = data[0][0]['Voltage_load'][0][j]
        time = data[0][0]['Time'][0][j]
        dataset.append([counter + 1, ambient_temperature, date_time, capacity,
                        voltage_measured, current_measured,
                        temperature_measured, current_load,
                        voltage_load, time])
      capacity_data.append([counter + 1, ambient_temperature, date_time, capacity])
      counter = counter + 1
  print(dataset[0])
  return [pd.DataFrame(data=dataset,
                       columns=['cycle', 'ambient_temperature', 'datetime',
                                'capacity', 'voltage_measured',
                                'current_measured', 'temperature_measured',
                                'current_load', 'voltage_load', 'time']),
          pd.DataFrame(data=capacity_data,
                       columns=['cycle', 'ambient_temperature', 'datetime',
                                'capacity'])]

## iterate over files, convert to parquet

In [None]:
import os
capacities = []
os.makedirs("capacities", exist_ok=True)
os.makedirs("time_series", exist_ok=True)
for battery in os.listdir('battery_data/'):
    if battery.endswith('.mat'): # exclude text files
        dataset, capacity = load_data(battery[:-4])
        dataset['bat_id'] = battery[:-4]
        capacity['bat_id'] = battery[:-4]
        capacities.append(capacity)
        dataset.to_parquet("time_series/%s.parquet"%(battery[:-4]),index=False)
        capacity.to_parquet("capacities/%s.parquet"%(battery[:-4]),index=False)

## upload parquets to blob storage

In [None]:
from azureml.core import Workspace
ws = Workspace.from_config()
datastore = ws.get_default_datastore()
datastore.upload(
    src_dir='./capacities',
    target_path='capacities',
    overwrite=True,
    )

## plot Li-ion Battery Aging

In [None]:
import matplotlib.pyplot as plt
for capacity in capacities:
    plt.plot(capacity.cycle, capacity.capacity, label=capacity.bat_id.iloc[0])

plt.title("Li-ion Battery Aging")
plt.xlabel("number of cycles")
plt.ylabel("Battery capacity (Ahr)")
plt.legend()