In [2]:
import pandas as pd
import torch
import numpy as np
import os
import matplotlib.pyplot as plt
%reload_ext jupyternotify

<IPython.core.display.Javascript object>

In [3]:
def addBravaisLattice(dataframe, spaceGroupTag):
    spaceGroupDf = pd.read_excel("Book1.xlsx")
    spaceGroupDf.columns = ['Crystal System','Space Group','Full Name']
    spaceGroupDf['Bravais Lattice'] = spaceGroupDf['Full Name'].str[0] + '-' + spaceGroupDf['Crystal System'].astype(str) 
    merged = dataframe.merge(spaceGroupDf, left_on=spaceGroupTag, right_on='Space Group', how='left')
    dataframe = merged.drop(['Bravais','Space Group','Full Name'],axis=1)
    
    merged.loc[merged['Bravais Lattice'] == 'P-tetragonal', 'Bravais Lattice'] = 'P-Tetragonal'
    merged.loc[merged['Bravais Lattice'] == 'I-tetragonal', 'Bravais Lattice'] = 'I-Tetragonal'
    merged.loc[merged['Bravais Lattice'] == 'P-Trigonal', 'Bravais Lattice'] = 'R-Trigonal'
    merged.loc[merged['Bravais Lattice'] == 'A-Orthorhombic', 'Bravais Lattice'] = 'C-Orthorhombic'
    merged.loc[merged['Bravais Lattice'] == 'P-hexagonal', 'Bravais Lattice'] = 'P-Hexagonal'
    return merged

##### Load Dataframes and add Bravais Lattice if needed

In [None]:
pathToICSD = ''
pathToCSD = ''
pathToCOD = ''

In [None]:
ICSD = pd.read_csv(pathToICSD)
ICSD = addBravaisLattice(ICSD, 'sym_group')

In [5]:
CSD = pd.read_csv(pathToCSD)

In [6]:
COD = pd.read_csv(pathToCOD)

  COD = pd.read_csv('COD.csv')


### Combining ICSD, COD, and CSD

1. COD: Fix chemical_sum name and strip all the whitespaces in COD to match ICSD
2. ICSD: Add path column and move cif_names there
3. ICSD and COD: Add 'Database' column thats either ICSD or COD
4. Make sure columns are the same

List of Final Columns
'name', 'Local Path', 'Database', 'Publication Year', 'a', 'b','c','alpha','beta','gamma','Volume', 'Bravais Lattice', 'Space Group','Numeric Bravais Lattice'

'name' = chemical name w no spaces 

'local path' = within the folder of the dataspace, this is a path to the file(inside the parent folder)

'publication year' = year it was published

'Bravais Lattice' = String bravais Lattice

'Space group' = numerical space group

'Numerical Space Group' = 0-13 for spacing group, see mapping below


In [None]:
#Task 1
COD['name'] = COD['Chemical Sum'].str.replace(' ', '')
CSD['name'] = CSD['Chemical Sum'].str.replace(' ', '')

In [None]:
#Task 2
#ISCD['Local Zip Path']
ICSD['Local Path'] = "diffraction.icsd_" +  ICSD['cif_names'].apply(lambda x: '{:06d}'.format(int(x)))
CSD['Local Path'] = CSD['File Path']
COD['Local Path'] = COD['Local Zip Path']

In [None]:
#Task 3
ICSD['Database'] = 'ICSD'
COD['Database'] = 'COD'
CSD['Database'] = 'CSD'

In [None]:
COD['Space Group'] = COD['Space Group IT Number']
ICSD['Space Group'] = ICSD['sym_group']
CSD['Space Group'] = CSD['Space Group IT Number']

In [None]:
COD['Volume'] = COD['volume']
ICSD['Volume'] = ICSD['V']
CSD['Volume'] = CSD['Cell Volume']

CSD['a'] = CSD['Cell Length A']
CSD['b'] = CSD['Cell Length B']
CSD['c'] = CSD['Cell Length C']
CSD['alpha'] = CSD['Cell Angle Alpha']
CSD['beta'] = CSD['Cell Angle Beta']
CSD['gamma'] = CSD['Cell Angle Gamma']

ICSD['Publication Year'] = ICSD['publication_year']
COD['Publication Year'] = COD['pubYear']

In [None]:
COD = COD[['name', 'Local Path', 'Database', 'Publication Year', 'a', 'b','c','alpha','beta','gamma','Volume', 'Bravais Lattice', 'Space Group']]
ICSD = ICSD[['name', 'Local Path', 'Database', 'Publication Year', 'a', 'b','c','alpha','beta','gamma','Volume', 'Bravais Lattice', 'Space Group']]
CSD = CSD[['name', 'Local Path', 'Database', 'Publication Year', 'a', 'b','c','alpha','beta','gamma','Volume', 'Bravais Lattice', 'Space Group']]

In [None]:
#CrystalData = pd.concat([COD, ICSD,CSD], axis=0)
CrystalData = pd.concat([CSD,ICSD],axis=0)
cols_to_check = [col for col in CrystalData.columns if col != 'Publication Year']

In [None]:
mapping = {0: 'F-Cubic',
  1: 'I-Cubic',
  2: 'P-Cubic',
  3: 'P-Hexagonal',
  4: 'C-Monoclinic',
  5: 'P-Monoclinic',
  6: 'C-Orthorhombic',
  7: 'F-Orthorhombic',
  8: 'I-Orthorhombic',
  9: 'P-Orthorhombic',
  10: 'R-Trigonal',
  11: 'I-Tetragonal',
  12: 'P-Tetragonal',
  13: 'P-Triclinic'}
my_dict_rev = {v: k for k, v in mapping.items()}
ICSD['Numeric Bravais Lattice'] = ICSD['Bravais Lattice'].map(my_dict_rev)
ICSD.to_csv('ICSD_With_Bravais.csv')

In [None]:
CrystalData.to_csv('Crystal Database.csv')

In [None]:
CrystalData = pd.read_csv('Crystal Database.csv')
ignore_column = 'Publication Year'

# Drop rows with NaN values, ignoring the specified column
df_cleaned = CrystalData.dropna(subset=[col for col in CrystalData.columns if col != ignore_column])

In [None]:
CrystalData = df_cleaned
CrystalData