In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import tabulate as tabulate

In [None]:
DATA_PATH = os.path.join(os.getcwd(), 'Data')
MODELS_PATH = os.path.join(os.getcwd(), 'Models')
WEIGHTS_PATH = os.path.join(os.getcwd(), 'Weights')
ASSETS_PATH = os.path.join(os.getcwd(), 'Assets')
RESULTS_PATH = os.path.join(os.getcwd(), 'Results')

In [None]:
df = pd.read_csv(
    r"C:\Users\joeya\Documents\Fokwa Group\ML_tutorial\intermetallics\intermetallics_dataset_Jarvis_Full2.csv")
print(df.shape)
df.drop(df.index[:4], inplace=True)
print(df.shape)

# Move formula and material_id columns to first and second position
first_column = df.pop('material_id')
second_column = df.pop('formula')
df.insert(0, 'material_id', first_column)
df.insert(1, 'formula', second_column)
print(df.shape)

In [None]:
# Dropping data with 0 magmom values

df.drop(df[df['total_magnetization_normalized_formula_units']
        < 0.10].index, inplace=True)
df.drop(df[df['total_magnetization_normalized_formula_units']
        > 80.000].index, inplace=True)

print(df.shape)

In [None]:
# Plotting occurances of crucial Fe, Cr, Co, Mn, Co, Ni elements in dataset

counts = df.elements.str.replace(r'\[|\]', '', regex=True)\
    .str.split(',\s').explode().value_counts()
print(counts)
counts_dict = counts.to_dict()
countsdf = pd.DataFrame(counts)
countsdf.to_csv(
    'C:/Users/joeya/Documents/Fokwa Group/ML_tutorial/intermetallics/tables&lists/Set7/ElementalCount.csv')

In [None]:
# Plot updated crystal system distribution

crystal_system_count_dict = {}
for entry in df['crystal_system']:
    if entry in crystal_system_count_dict:
        crystal_system_count_dict[entry] += 1
    else:
        crystal_system_count_dict[entry] = 0


plt.rcParams.update({'font.size': 30})
plt.figure(figsize=(25, 25))
plot = plt.bar(range(len(crystal_system_count_dict)),
               crystal_system_count_dict.values(), align='center')
plt.xticks(range(len(crystal_system_count_dict)), [
           str(key) for key in list(crystal_system_count_dict.keys())])
plt.title('Distribution of crystal systems within dataset')

plt.savefig(r'C:\Users\joeya\Documents\Fokwa Group\ML_tutorial\intermetallics\figures\crystal_systems_transparent.png',
            bbox_inches='tight', dpi=600, transparent=True)
plt.savefig(r'C:\Users\joeya\Documents\Fokwa Group\ML_tutorial\intermetallics\figures\crystal_systems.png',
            bbox_inches='tight', dpi=600)

In [None]:
# create histogram of magnetization per formula unit
print(df['total_magnetization_normalized_formula_units'].describe())

plt.rcParams['font.size'] = 10
plt.rcParams['figure.dpi'] = 600

plot1 = plt.hist(df['total_magnetization_normalized_formula_units'],
                 color='blue', edgecolor='black', bins=132, log=True)

plot1 = plt.xlabel('Magnetization ($\u03bc_B$/ f.u.)')
plot1 = plt.ylabel('log(counts)')
plot1 = plt.title('Frequency of magnetization values')

plt.savefig(r'C:\Users\joeya\Documents\Fokwa Group\ML_tutorial\intermetallics\figures\magnetization_histogram3_transparent.png',
            bbox_inches='tight', dpi=600, transparent=True)
plt.savefig(r'C:\Users\joeya\Documents\Fokwa Group\ML_tutorial\intermetallics\figures\magnetization_histogram3.png',
            bbox_inches='tight', dpi=600)

In [None]:
# Doesnt work on Spyder, so I did this on Jupyter
# List of Matminer featurizers used: AtomicFraction
df.to_csv(r"C:\Users\joeya\Documents\Fokwa Group\ML_tutorial\intermetallics\intermetallics_Fulldataset_cleaned2.csv", index=False)