In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib qt

# Original dataframe

In [2]:
df = pd.read_csv('datasets/df_file_output_v6_clean.csv')
print(df.shape)
df.dropna(subset='CA',inplace=True)
print(df.shape)
df.head()

(143666, 11)
(940, 11)


Unnamed: 0,id,Formule,BG,IM,a,b,c,alfa,beta,gamma,CA
13,mp-19,Te,0.1856,False,4.601352,4.601353,5.900062,89.955226,90.044754,119.990771,1073863.0
124,mp-149,Si,0.6105,False,3.849278,3.849279,3.849278,60.000012,60.000003,60.000011,113315.4
189,mp-239,BaS3,1.3913,False,4.216011,6.951955,6.951955,90.0,90.0,90.0,62546.52
190,mp-241,CdF2,2.8977,False,3.819109,3.819111,3.81911,60.000012,60.0,60.000017,1582.357
198,mp-252,BeTe,2.0173,False,4.004287,4.004288,4.004287,60.000014,60.00001,60.000006,10109.3


### We drop the primitive lattice columns

In [3]:
df = df.loc[:,['id','Formule','BG','IM','CA']]
df.head()

Unnamed: 0,id,Formule,BG,IM,CA
13,mp-19,Te,0.1856,False,1073863.0
124,mp-149,Si,0.6105,False,113315.4
189,mp-239,BaS3,1.3913,False,62546.52
190,mp-241,CdF2,2.8977,False,1582.357
198,mp-252,BeTe,2.0173,False,10109.3


# Conventional lattice dataframe

In [4]:
lattice_data_df = pd.read_csv('datasets/lattice_data.csv')
lattice_data_df.drop('Unnamed: 0',axis=1,inplace=True)
lattice_data_df

Unnamed: 0,id,a,b,c,alpha,beta,gamma
0,mp-19,4.601353,4.601353,5.900062,90.0,90.0,120.0
1,mp-149,5.443702,5.443702,5.443702,90.0,90.0,90.0
2,mp-239,6.951955,6.951955,4.216011,90.0,90.0,90.0
3,mp-241,5.401038,5.401038,5.401038,90.0,90.0,90.0
4,mp-252,5.662918,5.662918,5.662918,90.0,90.0,90.0
...,...,...,...,...,...,...,...
935,mp-999472,4.373467,4.373467,20.620553,90.0,90.0,120.0
936,mp-999474,4.098760,4.098760,20.778108,90.0,90.0,120.0
937,mp-999488,4.116628,4.116628,20.748485,90.0,90.0,120.0
938,mp-999489,4.170708,4.170708,20.737036,90.0,90.0,120.0


# Merging

In [5]:
new_df = pd.merge(
    left=df,
    right=lattice_data_df,
    how='inner',
    on='id'
)
new_df

Unnamed: 0,id,Formule,BG,IM,CA,a,b,c,alpha,beta,gamma
0,mp-19,Te,0.1856,False,1.073863e+06,4.601353,4.601353,5.900062,90.0,90.0,120.0
1,mp-149,Si,0.6105,False,1.133154e+05,5.443702,5.443702,5.443702,90.0,90.0,90.0
2,mp-239,BaS3,1.3913,False,6.254652e+04,6.951955,6.951955,4.216011,90.0,90.0,90.0
3,mp-241,CdF2,2.8977,False,1.582357e+03,5.401038,5.401038,5.401038,90.0,90.0,90.0
4,mp-252,BeTe,2.0173,False,1.010930e+04,5.662918,5.662918,5.662918,90.0,90.0,90.0
...,...,...,...,...,...,...,...,...,...,...,...
935,mp-999472,NaLaSe2,2.2767,False,3.138570e+04,4.373467,4.373467,20.620553,90.0,90.0,120.0
936,mp-999474,NaHoSe2,1.8867,False,2.690055e+04,4.098760,4.098760,20.778108,90.0,90.0,120.0
937,mp-999488,NaDySe2,1.8635,False,3.031457e+04,4.116628,4.116628,20.748485,90.0,90.0,120.0
938,mp-999489,NaGdSe2,1.3585,False,1.219142e+05,4.170708,4.170708,20.737036,90.0,90.0,120.0


## Exporting to a csv file

In [6]:
new_df.to_csv('datasets/MP_db_conv_unit_cell_CA.csv',index=False)