In [1]:
from mp_api import MPRester
import pandas as pd
from multiprocessing import Pool
import numpy as np

## Battery data from the new api

In [2]:
api_key = 'your key here'
mpr = MPRester(api_key)



In [3]:
docs = mpr.insertion_electrodes.search_electrode_docs()



Retrieving InsertionElectrodeDoc documents:   0%|          | 0/4275 [00:00<?, ?it/s]

In [4]:
docs_list = [i.__dict__ for i in docs]

In [5]:
df = pd.DataFrame(docs_list)

In [7]:
df.to_csv('battery_insertion.csv', index = False)

In [8]:
df.drop(columns = ['framework', 'elements', 'nelements', 'chemsys', 'last_updated', 'entries_composition_summary', 'material_ids'], inplace = True, errors = 'ignore')

In [9]:
# object to dict
df['adj_pairs'] = df.adj_pairs.apply(lambda ps: list(map(lambda x: x.__dict__, ps)))

## get voltage pairs / sub reactions

In [11]:
def f(x):
    for dp in x['adj_pairs']:
        dp['battery_id'] = x['battery_id']
        dp['working_ion'] = x['working_ion']
_ = df.apply(f, axis = 1)

In [12]:
voltage_steps = pd.DataFrame(np.concatenate(df.adj_pairs.to_numpy()).tolist())

In [13]:
voltage_steps

Unnamed: 0,max_delta_volume,average_voltage,capacity_grav,capacity_vol,energy_grav,energy_vol,fracA_charge,fracA_discharge,formula_charge,formula_discharge,stability_charge,stability_discharge,id_charge,id_discharge,battery_id,working_ion
0,0.057981,4.321408,157.522528,359.331062,680.719074,1552.816033,0.000000,0.100000,Co(GeO3)2,CaCo(GeO3)2,0.167779,0.000000,mp-1043245,mp-1043203,mp-1043203_Ca,Ca
1,0.132289,3.210683,56.585387,57.929055,181.677753,185.991848,0.000000,0.040000,MnAgO4,CaMn4(AgO4)4,0.170110,0.209815,mp-608357,mp-1233090,mp-608357_Ca,Ca
2,0.016781,4.577865,92.786977,185.109632,424.766282,847.406961,0.061224,0.115385,Ca3Cu6(AsO4)8,Ca3Cu3(AsO4)4,0.121923,0.023373,mp-1351940,mp-555937,mp-555937_Ca,Ca
3,0.106104,4.738932,141.061578,164.294341,668.481260,778.579752,0.000000,0.125000,VBiO5,CaVBiO5,0.289718,0.009257,mp-1372756,mp-1044259,mp-1044259_Ca,Ca
4,0.043449,4.059183,135.755791,393.143343,551.057565,1595.840676,0.000000,0.100000,Pr(CoO3)2,CaPr(CoO3)2,0.173336,0.046077,mp-1376950,mp-1044741,mp-1044741_Ca,Ca
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4966,0.071995,2.610888,271.754682,635.506495,709.521043,1659.236292,0.000000,0.166667,FeF4,ZnFeF4,0.194526,0.094786,mp-1396678,mp-1390771,mp-1390771_Zn,Zn
4967,0.059201,1.145081,149.168968,68.975042,170.810583,78.982024,0.000000,0.076923,CrPO4,ZnCr2(PO4)2,0.213853,0.055674,mp-1351781,mp-1040837,mp-1040837_Zn,Zn
4968,0.028359,-0.170672,64.442144,396.427389,-10.998465,-67.659028,0.000000,0.050000,Cu3(MoO3)4,ZnCu3(MoO3)4,0.012036,0.127441,mp-1043118,mp-1043065,mp-1043065_Zn,Zn
4969,0.040038,2.191880,140.306970,135.982389,307.536047,298.057084,0.000000,0.071429,CrNiP2O9,ZnCrNiP2O9,0.109255,0.080301,mp-1044071,mp-1043482,mp-1043482_Zn,Zn


In [17]:
voltage_steps.to_csv('mp_new.csv', index = False)

## combine the data

In [14]:
legacy_pairs = pd.read_csv('mp_legacy.csv')
legacy_pairs.rename(columns = {'battid' : 'battery_id', 'voltage' : 'average_voltage'}, inplace = True)
legacy_pairs['battery_type'] = 'intercalation'
voltage_steps['battery_type'] = 'insertion'

In [15]:
# concat
total_pairs = pd.concat([legacy_pairs, voltage_steps], ignore_index=True)

In [16]:
total_pairs

Unnamed: 0,average_voltage,max_delta_volume,capacity_grav,capacity_vol,energy_grav,energy_vol,working_ion,formula_charge,id_charge,formula_discharge,id_discharge,fracA_charge,fracA_discharge,stability_charge,stability_discharge,battery_id,battery_type
0,0.192475,0.025205,177.411715,358.276639,34.147250,68.959155,Li,C,mp-568806,LiC12,mp-1021323,0.000000,0.076923,0.006175,0.000000,mp-1001581_Li,intercalation
1,-0.016980,0.024483,169.618464,367.048396,-2.880110,-6.232456,Li,LiC12,mp-1021323,LiC6,mp-1001581,0.076923,0.142857,0.000000,0.001213,mp-1001581_Li,intercalation
2,-1.852321,12.612895,158.738744,81.064640,-294.035064,-150.157712,Li,InBi,mp-1001833,Li2InBi,mp-1096733,0.000000,0.500000,0.028716,1.301204,mp-1001833_Li,intercalation
3,-1.798227,17.704725,144.959872,58.997096,-260.670751,-106.090168,Na,InBi,mp-1001833,Na2InBi,mp-1096344,0.000000,0.500000,0.028716,1.180526,mp-1001833_Na,intercalation
4,-1.593907,13.165226,157.644042,80.219754,-251.269942,-127.862827,Li,TlSb,mp-1002219,Li2TlSb,mp-1097580,0.000000,0.500000,0.138281,1.288802,mp-1002219_Li,intercalation
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10124,2.610888,0.071995,271.754682,635.506495,709.521043,1659.236292,Zn,FeF4,mp-1396678,ZnFeF4,mp-1390771,0.000000,0.166667,0.194526,0.094786,mp-1390771_Zn,insertion
10125,1.145081,0.059201,149.168968,68.975042,170.810583,78.982024,Zn,CrPO4,mp-1351781,ZnCr2(PO4)2,mp-1040837,0.000000,0.076923,0.213853,0.055674,mp-1040837_Zn,insertion
10126,-0.170672,0.028359,64.442144,396.427389,-10.998465,-67.659028,Zn,Cu3(MoO3)4,mp-1043118,ZnCu3(MoO3)4,mp-1043065,0.000000,0.050000,0.012036,0.127441,mp-1043065_Zn,insertion
10127,2.191880,0.040038,140.306970,135.982389,307.536047,298.057084,Zn,CrNiP2O9,mp-1044071,ZnCrNiP2O9,mp-1043482,0.000000,0.071429,0.109255,0.080301,mp-1043482_Zn,insertion


In [18]:
total_pairs.to_csv('mp_total.csv', index = False )

### disregarding "battery_type" :

battery_type may not be reliable. Those that are labelled as insertion might actually be intercalation, according to the battery explorer app on the mp website. 

Below is the code to drop the column and remove duplicating batteries. 

In [28]:
# drop duplicates, keeps the newest calculations from the new api (ignore insertion/ intercalation, based on bettery and material ids)
no_duplicates_pairs = total_pairs.drop_duplicates(subset=['id_charge', 'id_discharge', 'battery_id'], keep = 'last')
no_duplicates_pairs.drop(columns = ['battery_type'], inplace=True)
no_duplicates_pairs

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


Unnamed: 0,average_voltage,max_delta_volume,capacity_grav,capacity_vol,energy_grav,energy_vol,working_ion,formula_charge,id_charge,formula_discharge,id_discharge,fracA_charge,fracA_discharge,stability_charge,stability_discharge,battery_id
0,0.192475,0.025205,177.411715,358.276639,34.147250,68.959155,Li,C,mp-568806,LiC12,mp-1021323,0.000000,0.076923,0.006175,0.000000,mp-1001581_Li
1,-0.016980,0.024483,169.618464,367.048396,-2.880110,-6.232456,Li,LiC12,mp-1021323,LiC6,mp-1001581,0.076923,0.142857,0.000000,0.001213,mp-1001581_Li
2,-1.852321,12.612895,158.738744,81.064640,-294.035064,-150.157712,Li,InBi,mp-1001833,Li2InBi,mp-1096733,0.000000,0.500000,0.028716,1.301204,mp-1001833_Li
3,-1.798227,17.704725,144.959872,58.997096,-260.670751,-106.090168,Na,InBi,mp-1001833,Na2InBi,mp-1096344,0.000000,0.500000,0.028716,1.180526,mp-1001833_Na
4,-1.593907,13.165226,157.644042,80.219754,-251.269942,-127.862827,Li,TlSb,mp-1002219,Li2TlSb,mp-1097580,0.000000,0.500000,0.138281,1.288802,mp-1002219_Li
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10124,2.610888,0.071995,271.754682,635.506495,709.521043,1659.236292,Zn,FeF4,mp-1396678,ZnFeF4,mp-1390771,0.000000,0.166667,0.194526,0.094786,mp-1390771_Zn
10125,1.145081,0.059201,149.168968,68.975042,170.810583,78.982024,Zn,CrPO4,mp-1351781,ZnCr2(PO4)2,mp-1040837,0.000000,0.076923,0.213853,0.055674,mp-1040837_Zn
10126,-0.170672,0.028359,64.442144,396.427389,-10.998465,-67.659028,Zn,Cu3(MoO3)4,mp-1043118,ZnCu3(MoO3)4,mp-1043065,0.000000,0.050000,0.012036,0.127441,mp-1043065_Zn
10127,2.191880,0.040038,140.306970,135.982389,307.536047,298.057084,Zn,CrNiP2O9,mp-1044071,ZnCrNiP2O9,mp-1043482,0.000000,0.071429,0.109255,0.080301,mp-1043482_Zn


In [142]:
# # check that dropped instances are actual duplicates
# uniques = no_duplicates_pairs.index
# dups = [i for i in total_pairs.index if i not in uniques]
# dup_ids = set(total_pairs.loc[dups].battery_id.unique())
# dup_rows = total_pairs[total_pairs.battery_id.apply(lambda x: x in dup_ids)]
# dup_rows.sort_values(by = ['battery_id']).to_csv('temp.csv')