In [2]:
import pymatgen
from pymatgen.ext.matproj import MPRester
import pandas as pd
from multiprocessing import Pool
import numpy as np

# Battery data from the legacy api

## the dataset



In [3]:
display(pd.read_csv('mp_legacy.csv'))

Unnamed: 0,voltage,max_delta_volume,capacity_grav,capacity_vol,energy_grav,energy_vol,working_ion,formula_charge,id_charge,formula_discharge,id_discharge,fracA_charge,fracA_discharge,stability_charge,stability_discharge,battid
0,0.192475,0.025205,177.411715,358.276639,34.147250,68.959155,Li,C,mp-568806,LiC12,mp-1021323,0.000000,0.076923,0.006175,0.000000,mp-1001581_Li
1,-0.016980,0.024483,169.618464,367.048396,-2.880110,-6.232456,Li,LiC12,mp-1021323,LiC6,mp-1001581,0.076923,0.142857,0.000000,0.001213,mp-1001581_Li
2,-1.852321,12.612895,158.738744,81.064640,-294.035064,-150.157712,Li,InBi,mp-1001833,Li2InBi,mp-1096733,0.000000,0.500000,0.028716,1.301204,mp-1001833_Li
3,-1.798227,17.704725,144.959872,58.997096,-260.670751,-106.090168,Na,InBi,mp-1001833,Na2InBi,mp-1096344,0.000000,0.500000,0.028716,1.180526,mp-1001833_Na
4,-1.593907,13.165226,157.644042,80.219754,-251.269942,-127.862827,Li,TlSb,mp-1002219,Li2TlSb,mp-1097580,0.000000,0.500000,0.138281,1.288802,mp-1002219_Li
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5153,1.778259,0.023953,191.314388,990.657064,340.206495,1761.644646,Mg,MoO2,mvc-6377,Mg(MoO2)2,mvc-6176,0.000000,0.142857,0.307103,0.215717,mvc-6176_Mg
5154,0.516242,0.025113,166.838464,989.536244,86.128944,510.839705,Zn,MoO2,mvc-6377,Zn(MoO2)2,mvc-6278,0.000000,0.142857,0.307103,0.212932,mvc-6278_Zn
5155,2.221493,0.132895,181.118258,895.393491,402.353007,1989.110690,Ca,MoO2,mvc-6377,Ca(MoO2)2,mvc-6340,0.000000,0.142857,0.307103,0.185936,mvc-6340_Ca
5156,1.433162,0.132882,113.427221,935.243862,162.559587,1340.355998,Al,Re(WO4)2,mvc-637,AlRe(WO4)2,mvc-698,0.000000,0.083333,0.397062,0.382436,mvc-637_Al


Each row represents a battery, with:
- its working ion, 
- discharge and charge formulas
- properties of the battery as a whole 
- properties of discharge / charged battery respectively

more properties about the discharge and charge formulas can be retrieved using the material id and the api

documentaion of the battery explorer: https://docs.materialsproject.org/apps/battery-explorer (currently unavailable)


# getting the data

In [None]:
# this notebook uses the legacy api from pymatgen, which requires a api 
# key from the legacy website

In [4]:
# https://matsci.org/t/battery-explorer-using-pymatgen/34283/2
def get_battery_data(self, formula_or_batt_id):
    """Returns batteries from a batt id or formula.

    Examples:
        get_battery("mp-300585433")
        get_battery("LiFePO4")
    """
    return self._make_request('/battery/%s' % formula_or_batt_id)
MPRester.get_battery_data = get_battery_data

In [5]:
# use legacy api key
api_key = 'your key here'
mpr = MPRester(api_key)

In [6]:
battids = mpr._make_request('/battery/all_ids')

In [10]:
l = [None] * len(battids)

def f(i):
    local_dict = mpr.get_battery_data(battids[i])[0]
    return local_dict
with Pool(12) as p:
    rst = p.map(f, range(len(battids)))

In [11]:
df = pd.DataFrame(rst)

In [13]:
df.to_csv('battery_intercalation.csv', index = False)

In [14]:
df.head(3)

Unnamed: 0,battid,reduced_cell_formula,average_voltage,min_voltage,max_voltage,nsteps,min_instability,capacity_grav,capacity_vol,working_ion,min_frac,max_frac,reduced_cell_composition,framework,adj_pairs,spacegroup,energy_grav,energy_vol,numsites,type
0,mp-1001581_Li,C,0.087747,-0.01698,0.192475,2,0.0,339.236928,734.096792,Li,0.0,0.142857,{'C': 1.0},"{'reduced_cell_composition': {'C': 1.0}, 'unit...","[{'average_voltage': 0.1924746049999988, 'max_...","{'number': 191, 'hall_number': 485, 'internati...",29.767137,64.415039,1.0,intercalation
1,mp-1001833_Li,InBi,-1.852321,-1.852321,-1.852321,1,0.028716,158.738744,81.06464,Li,0.0,0.5,"{'In': 1.0, 'Bi': 1.0}","{'reduced_cell_composition': {'In': 1.0, 'Bi':...","[{'average_voltage': -1.8523207099999999, 'max...","{'number': 216, 'hall_number': 512, 'internati...",-294.035064,-150.157712,2.0,intercalation
2,mp-1001833_Na,InBi,-1.798227,-1.798227,-1.798227,1,0.028716,144.959872,58.997096,Na,0.0,0.5,"{'In': 1.0, 'Bi': 1.0}","{'reduced_cell_composition': {'In': 1.0, 'Bi':...","[{'average_voltage': -1.7982269649999998, 'max...","{'number': 216, 'hall_number': 512, 'internati...",-260.670751,-106.090168,2.0,intercalation


## data processing

The raw data contains "master cells", where each instance is a battery cell that operates with one or more chemical reactions / voltage steps (num of reactions indicated by nsteps). Each step of reaction is recorded in adj_pairs.


Here, we only keep the extracted subreactions / voltage steps.

Information about the master cell remains in battery_intercalation.csv. 

In [15]:
subset = []
def f(x):
    for dp in x['adj_pairs']:
        dp['battid'] = x['battid']
        subset.append(dp)
_ = df.apply(f, axis = 1)

In [16]:
df_ = pd.DataFrame(subset)

In [17]:
df_.drop(columns=['muO2_data', 'max_voltage', 'min_voltage', 'max_voltage_step', 'nsteps', 'framework'], inplace=True, errors='ignore')

In [18]:
# stable ids = ids of the discharge and charge materials
df_.apply(lambda x: x['id_charge'] in x['stable_material_ids'] and x['id_discharge'] in x['stable_material_ids'],axis = 1).value_counts()

True    5158
dtype: int64

In [19]:
# drop instables that are intermediates and repetitive stable ids
df_.drop(columns=['stable_material_ids', 'unstable_material_ids',
                 'material_ids', 'stability_data', 'max_instability',
                 'min_instability'], inplace=True, errors='ignore')

In [20]:
df_.rename(columns={'average_voltage' : 'voltage'}, inplace = True)

In [21]:
df_

Unnamed: 0,voltage,max_delta_volume,capacity_grav,capacity_vol,energy_grav,energy_vol,working_ion,formula_charge,id_charge,formula_discharge,id_discharge,fracA_charge,fracA_discharge,stability_charge,stability_discharge,battid
0,0.192475,0.025205,177.411715,358.276639,34.147250,68.959155,Li,C,mp-568806,LiC12,mp-1021323,0.000000,0.076923,0.006175,0.000000,mp-1001581_Li
1,-0.016980,0.024483,169.618464,367.048396,-2.880110,-6.232456,Li,LiC12,mp-1021323,LiC6,mp-1001581,0.076923,0.142857,0.000000,0.001213,mp-1001581_Li
2,-1.852321,12.612895,158.738744,81.064640,-294.035064,-150.157712,Li,InBi,mp-1001833,Li2InBi,mp-1096733,0.000000,0.500000,0.028716,1.301204,mp-1001833_Li
3,-1.798227,17.704725,144.959872,58.997096,-260.670751,-106.090168,Na,InBi,mp-1001833,Na2InBi,mp-1096344,0.000000,0.500000,0.028716,1.180526,mp-1001833_Na
4,-1.593907,13.165226,157.644042,80.219754,-251.269942,-127.862827,Li,TlSb,mp-1002219,Li2TlSb,mp-1097580,0.000000,0.500000,0.138281,1.288802,mp-1002219_Li
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5153,1.778259,0.023953,191.314388,990.657064,340.206495,1761.644646,Mg,MoO2,mvc-6377,Mg(MoO2)2,mvc-6176,0.000000,0.142857,0.307103,0.215717,mvc-6176_Mg
5154,0.516242,0.025113,166.838464,989.536244,86.128944,510.839705,Zn,MoO2,mvc-6377,Zn(MoO2)2,mvc-6278,0.000000,0.142857,0.307103,0.212932,mvc-6278_Zn
5155,2.221493,0.132895,181.118258,895.393491,402.353007,1989.110690,Ca,MoO2,mvc-6377,Ca(MoO2)2,mvc-6340,0.000000,0.142857,0.307103,0.185936,mvc-6340_Ca
5156,1.433162,0.132882,113.427221,935.243862,162.559587,1340.355998,Al,Re(WO4)2,mvc-637,AlRe(WO4)2,mvc-698,0.000000,0.083333,0.397062,0.382436,mvc-637_Al


In [22]:
df_.to_csv('mp_legacy.csv', index = False)