In [42]:
import numpy as np
import json
import bz2
import pandas as pd 
import os
import seaborn as sns
from tqdm import tqdm
import matplotlib.pyplot as plt
import psutil
# import csv
import pickle
# import pandas as pd
from pymatgen.core import Structure
import math
pd.options.display.max_colwidth = 100
sns.set_theme(style = 'ticks')

In [None]:
# def save_arr_to_csv(arr, path):
#     with open(path, mode = 'w', newline = '') as csvfile:
#         writer = csv.writer(csvfile)
#         writer.writerows(arr)

# def read_from_csv(path):
#     with open(path, mode = 'r') as csvfile:
#         reader = csv.reader(csvfile)
#         arr = [row for row in reader]
#     return arr

#### Custom Methods

In [43]:
def separate_forces(var_forces):
    arr = []
    for index, force_arr in enumerate(var_forces):
        if len(force_arr) > 1:
            # print(force_arr)
            # print(len(force_arr))
            for i in range(len(force_arr)):
                # print(force_arr[i])
                arr.append(force_arr[i])
        else:
            arr.append(force_arr)
    return arr

def get_coordinate(array, axis = 'x'):

    switcher = {
        "x": int(0),
        "y": int(1),
        "z": int(2),
    }

    switch = switcher.get(axis, "nothing")
    coordinate_arr = [arr[switch] for arr in array] 
    print(coordinate_arr)
    return coordinate_arr


def magnitude_vec(vector):
    print("Vector: {}".format(vector)) 
    # print(sum(pow(element, 2) for element in vector))
    # print([element for element in vector])
    print(math.sqrt(sum(np.power(element, 2) for element in vector)))
    return math.sqrt(sum(np.power(element, 2) for element in vector))

    # return math.sqrt(sum(np.power(element, 2) for element in vector))

def abs_forces_per_run(var_forces):
    arr = []
    for index, force_arr in enumerate(var_forces):
        if len(force_arr) > 1:
            print(force_arr)
            sum_arr = 0
            for i in range(len(force_arr)):
                print(force_arr[i])
                sum_arr += magnitude_vec(force_arr[i])
            arr.append(sum_arr)
        else:
            arr.append(magnitude_vec(force_arr[0]))
    return arr


def import_df(path):
    df = pd.read_pickle("{}".format(path))
    return df


def add_cum_count(df):
    cum_series = df.groupby('run').cumcount()
    cum_series = cum_series.astype(str)
    df['run'] = df['run'] + "_" + cum_series


In [44]:
def get_df(directory, overwrite = False):
    if overwrite == True:
        print("Number of files in directory: {}".format(len(os.listdir(directory))))
        files = os.listdir(directory)
        print(files)

        df_data = pd.DataFrame(columns=['run', 'energy', 'forces', 'stress_xx', 'stress_xy', 'stress_xz', 'stress_yx', 'stress_yy', 'stress_yz', 'stress_zx', 'stress_zy', 'stress_zz'])

        for file_name in files:
            print("Current file selected: {}".format(file_name))
            print('RAM memory % used:', psutil.virtual_memory()[2])
            # Getting usage of virtual_memory in GB ( 4th field)
            print('RAM Used (GB):', psutil.virtual_memory()[3]/1000000000)
            with bz2.BZ2File("{}/{}".format(directory,file_name)) as file:
                for line in file:
                    line = line.decode().strip()
                    if line in {"[", "]"}:
                        continue
                    if line.endswith(" "):
                        line = line[:-1]
                    entity =json.loads(line)
                    for run in entity.keys():
                        for item in entity[run]:
                            # print(item)
                            struc = Structure.from_dict(item['structure'])
                            forces = item['forces']
                    
                            stress = item['stress']
                            energy = item['energy']
                            
                            # if energy > 0:
                            #     print("There is a positive energy entry: {}".format(energy))
                            
                            data_run = pd.DataFrame({'run': run,
                                                        "structure": [struc],
                                                        "energy": energy, 
                                                        "forces": [forces], 
                                                        'stress_xx': [stress[0][0]], 'stress_xy': [stress[0][1]], 'stress_xz':[stress[0][2]], 
                                                        'stress_yx': [stress[1][0]], 'stress_yy': [stress[1][1]], 'stress_yz': [stress[1][2]], 
                                                        'stress_zx': [stress[2][0]], 'stress_zy': [stress[2][1]], 'stress_zz': [stress[2][2]]})

                            data_run = pd.DataFrame(data_run)
                            df_data = pd.concat([df_data, data_run])

        df_data.reset_index(inplace = True, drop = True)

        add_cum_count(df_data)
        
        df_data.memory_usage()
        df_data.to_pickle("df_data.pkl")

    elif overwrite == False:
       df_data = import_df('df_data.pkl')

    return df_data

def create_arr(directory):

    print("Number of files in directory: {}".format(len(os.listdir(directory))))
    files = os.listdir(directory)
    print(files)

    dict_id = {}
    # df_id = pd.DataFrame(columns = ["id", "run"])

    stress_arr = []
    energy_arr = []
    forces_arr = []
    index = 0

    for file_name in tqdm(files):
        print("Current file selected: {}".format(file_name))
        
        with bz2.BZ2File("{}/{}".format(directory,file_name)) as file:
            for line in tqdm(file):
                line = line.decode().strip()
                if line in {"[", "]"}:
                    continue
                if line.endswith(" "):
                    line = line[:-1]
                entity =json.loads(line)
                for run in entity.keys():
                    for item in entity[run]:
                        # print(len(entity[run]))
                        struc = Structure.from_dict(item['structure'])
                        forces = item['forces']
                        # force_1 = item['forces'][0]
                        stress = item['stress']
                        energy = item['energy']
                        
                        dict_id[index] = run

                        stress_arr.append(stress)
                        forces_arr.append(forces)
                        energy_arr.append(energy)
                        
                        with open('log.txt', 'a') as f:
                            f.write("{}-{}: Energy: {}, Forces: {}, Stress: {} + \n".format(run, index, energy, forces, stress))
                        index += 1

    return energy_arr, stress_arr, forces_arr, dict_id

In [45]:
energy_arr, stress_arr, forces_arr, dict_id = create_arr("geo_opt")

Number of files in directory: 92
['2_spg29.json.bz2', '2_spg28.json.bz2', 'ml_2_A2B7.json.bz2', 'ml_2_A9B10.json.bz2', 'ml_2_A9B11.json.bz2', 'ml_3_AB2C6.json.bz2', '2_spg17.json.bz2', 'ml_2_A3B4.json.bz2', 'ml_2_A3B5.json.bz2', '1_elements.json.bz2', 'ml_2_A4B9.json.bz2', 'ml_4_ABC2D6.json.bz2', 'ext_c2db_0.25_3.json.bz2', 'ext_c2db_0.25_2.json.bz2', '2_quasi.json.bz2', '2_spg01.json.bz2', 'ml_2_A5B11.json.bz2', '2_spg34.json.bz2', '2_spg35.json.bz2', '2_spg43.json.bz2', '2_spg70.json.bz2', 'ml_3_ABC6.json.bz2', 'ml_2_AB6.json.bz2', 'ml_2_AB7.json.bz2', '2_extra01.json.bz2', '2_spg33.json.bz2', '2_spg32.json.bz2', 'ml_3_A2B2C3.json.bz2', 'ml_2_A3B11.json.bz2', 'ml_2_A3B10.json.bz2', 'ml_2_A3B8.json.bz2', 'ml_3_AB3C8.json.bz2', '3_spg33.json.bz2', 'ml_2_A4B5.json.bz2', '2_spg10.json.bz2', '2_hydrides.json.bz2', 'ml_3_ABC.json.bz2', 'ml_2_A5B7.json.bz2', 'ml_2_A5B6.json.bz2', 'ext_c2db_0.25_4.json.bz2', 'ext_c2db_0.25_5.json.bz2', 'ml_3_ABC2.json.bz2', 'ml_3_ABC3.json.bz2', '2_spg36.jso

  0%|          | 0/92 [00:00<?, ?it/s]

Current file selected: 2_spg29.json.bz2


1it [00:03,  3.65s/it]
  1%|          | 1/92 [00:03<05:32,  3.65s/it]

Current file selected: 2_spg28.json.bz2


1it [00:17, 17.53s/it]
  2%|▏         | 2/92 [00:21<17:43, 11.82s/it]

Current file selected: ml_2_A2B7.json.bz2


1it [00:02,  2.08s/it]
  3%|▎         | 3/92 [00:23<10:56,  7.38s/it]

Current file selected: ml_2_A9B10.json.bz2


1it [00:01,  1.38s/it]
  4%|▍         | 4/92 [00:24<07:20,  5.01s/it]

Current file selected: ml_2_A9B11.json.bz2


1it [00:07,  7.77s/it]
  5%|▌         | 5/92 [00:32<08:42,  6.00s/it]

Current file selected: ml_3_AB2C6.json.bz2


1it [00:21, 21.68s/it]
  7%|▋         | 6/92 [00:54<16:14, 11.34s/it]

Current file selected: 2_spg17.json.bz2


1it [00:04,  4.05s/it]
  8%|▊         | 7/92 [00:58<12:41,  8.95s/it]

Current file selected: ml_2_A3B4.json.bz2


1it [00:10, 10.71s/it]
  9%|▊         | 8/92 [01:08<13:19,  9.52s/it]

Current file selected: ml_2_A3B5.json.bz2


1it [00:08,  8.06s/it]
 10%|▉         | 9/92 [01:16<12:32,  9.06s/it]

Current file selected: 1_elements.json.bz2


1it [00:02,  2.54s/it]
 11%|█         | 10/92 [01:19<09:38,  7.05s/it]

Current file selected: ml_2_A4B9.json.bz2


1it [00:11, 11.10s/it]
 12%|█▏        | 11/92 [01:30<11:11,  8.29s/it]

Current file selected: ml_4_ABC2D6.json.bz2


1it [00:06,  6.78s/it]
 13%|█▎        | 12/92 [01:37<10:26,  7.83s/it]

Current file selected: ext_c2db_0.25_3.json.bz2


1it [00:00,  8.19it/s]
 14%|█▍        | 13/92 [01:37<07:14,  5.50s/it]

Current file selected: ext_c2db_0.25_2.json.bz2


1it [00:00,  1.18it/s]
 15%|█▌        | 14/92 [01:38<05:19,  4.09s/it]

Current file selected: 2_quasi.json.bz2


1it [00:02,  2.29s/it]
 16%|█▋        | 15/92 [01:40<04:33,  3.55s/it]

Current file selected: 2_spg01.json.bz2


1it [00:00,  5.27it/s]
 17%|█▋        | 16/92 [01:40<03:12,  2.54s/it]

Current file selected: ml_2_A5B11.json.bz2


1it [00:05,  5.83s/it]
 18%|█▊        | 17/92 [01:46<04:24,  3.53s/it]

Current file selected: 2_spg34.json.bz2


1it [00:02,  2.90s/it]
 20%|█▉        | 18/92 [01:49<04:07,  3.34s/it]

Current file selected: 2_spg35.json.bz2


1it [00:01,  1.05s/it]
 21%|██        | 19/92 [01:50<03:13,  2.65s/it]

Current file selected: 2_spg43.json.bz2


1it [02:40, 160.13s/it]
 22%|██▏       | 20/92 [04:30<59:57, 49.96s/it]

Current file selected: 2_spg70.json.bz2


1it [04:35, 275.79s/it]
 23%|██▎       | 21/92 [09:06<2:19:20, 117.75s/it]

Current file selected: ml_3_ABC6.json.bz2


1it [00:22, 22.43s/it]
 24%|██▍       | 22/92 [09:29<1:44:00, 89.16s/it] 

Current file selected: ml_2_AB6.json.bz2


1it [00:03,  3.60s/it]
 25%|██▌       | 23/92 [09:32<1:13:00, 63.48s/it]

Current file selected: ml_2_AB7.json.bz2


1it [00:00,  1.34it/s]
 26%|██▌       | 24/92 [09:33<50:36, 44.66s/it]  

Current file selected: 2_extra01.json.bz2


1it [00:01,  1.81s/it]
 27%|██▋       | 25/92 [09:35<35:30, 31.80s/it]

Current file selected: 2_spg33.json.bz2


1it [00:00,  1.72it/s]
 28%|██▊       | 26/92 [09:35<24:40, 22.44s/it]

Current file selected: 2_spg32.json.bz2


1it [00:03,  3.68s/it]
 29%|██▉       | 27/92 [09:39<18:12, 16.81s/it]

Current file selected: ml_3_A2B2C3.json.bz2


1it [00:00, 12.70it/s]


Current file selected: ml_2_A3B11.json.bz2


1it [00:04,  4.49s/it]
 32%|███▏      | 29/92 [09:44<10:36, 10.11s/it]

Current file selected: ml_2_A3B10.json.bz2


1it [00:04,  4.95s/it]
 33%|███▎      | 30/92 [09:49<09:07,  8.83s/it]

Current file selected: ml_2_A3B8.json.bz2


1it [00:03,  3.04s/it]
 34%|███▎      | 31/92 [09:52<07:26,  7.32s/it]

Current file selected: ml_3_AB3C8.json.bz2


1it [00:17, 17.31s/it]
 35%|███▍      | 32/92 [10:09<10:02, 10.04s/it]

Current file selected: 3_spg33.json.bz2


1it [01:26, 86.25s/it]
 36%|███▌      | 33/92 [11:35<30:50, 31.37s/it]

Current file selected: ml_2_A4B5.json.bz2


1it [00:17, 17.55s/it]
 37%|███▋      | 34/92 [11:53<26:30, 27.43s/it]

Current file selected: 2_spg10.json.bz2


1it [00:01,  1.67s/it]
 38%|███▊      | 35/92 [11:54<18:57, 19.96s/it]

Current file selected: 2_hydrides.json.bz2


1it [02:31, 151.05s/it]
 39%|███▉      | 36/92 [14:26<54:27, 58.35s/it]

Current file selected: ml_3_ABC.json.bz2


1it [00:10, 10.27s/it]
 40%|████      | 37/92 [14:36<40:29, 44.17s/it]

Current file selected: ml_2_A5B7.json.bz2


1it [00:07,  7.04s/it]
 41%|████▏     | 38/92 [14:43<29:50, 33.17s/it]

Current file selected: ml_2_A5B6.json.bz2


1it [00:18, 18.38s/it]
 42%|████▏     | 39/92 [15:01<25:24, 28.77s/it]

Current file selected: ext_c2db_0.25_4.json.bz2


1it [00:01,  1.47s/it]
 43%|████▎     | 40/92 [15:03<17:52, 20.63s/it]

Current file selected: ext_c2db_0.25_5.json.bz2


1it [01:07, 67.54s/it]
 45%|████▍     | 41/92 [16:10<29:26, 34.65s/it]

Current file selected: ml_3_ABC2.json.bz2


1it [00:01,  1.59s/it]
 46%|████▌     | 42/92 [16:12<20:37, 24.76s/it]

Current file selected: ml_3_ABC3.json.bz2


1it [00:02,  2.00s/it]
 47%|████▋     | 43/92 [16:14<14:39, 17.95s/it]

Current file selected: 2_spg36.json.bz2


1it [01:58, 118.03s/it]
 48%|████▊     | 44/92 [18:12<38:20, 47.93s/it]

Current file selected: ml_2_AB2.json.bz2


1it [00:22, 22.35s/it]
 49%|████▉     | 45/92 [18:34<31:32, 40.27s/it]

Current file selected: ml_2_AB3.json.bz2


1it [01:40, 100.62s/it]
 50%|█████     | 46/92 [20:15<44:44, 58.36s/it]

Current file selected: 2_spg09.json.bz2


1it [00:01,  1.68s/it]
 51%|█████     | 47/92 [20:17<31:01, 41.37s/it]

Current file selected: 2_spg08.json.bz2


1it [00:06,  6.43s/it]
 52%|█████▏    | 48/92 [20:23<22:39, 30.89s/it]

Current file selected: ml_2_A4B11.json.bz2


1it [00:06,  6.47s/it]
 53%|█████▎    | 49/92 [20:29<16:53, 23.57s/it]

Current file selected: ml_2_A6B7.json.bz2


1it [01:23, 83.34s/it]
 54%|█████▍    | 50/92 [21:53<29:03, 41.50s/it]

Current file selected: 2_carbon.json.bz2


1it [00:00,  1.00it/s]
 55%|█████▌    | 51/92 [21:54<20:03, 29.35s/it]

Current file selected: ml_2_AB.json.bz2


1it [00:12, 12.08s/it]
 57%|█████▋    | 52/92 [22:06<16:06, 24.17s/it]

Current file selected: ml_2_A5B12.json.bz2


1it [00:06,  6.36s/it]
 58%|█████▊    | 53/92 [22:12<12:14, 18.83s/it]

Current file selected: ml_2_A3B7.json.bz2


1it [00:04,  4.36s/it]
 59%|█████▊    | 54/92 [22:17<09:10, 14.49s/it]

Current file selected: 2_spg21.json.bz2


1it [01:54, 114.24s/it]
 60%|█████▉    | 55/92 [24:11<27:23, 44.42s/it]

Current file selected: ml_2_A8B9.json.bz2


1it [00:08,  8.08s/it]
 61%|██████    | 56/92 [24:19<20:06, 33.52s/it]

Current file selected: ml_4_AB2C2D2.json.bz2


1it [00:00,  2.13it/s]
 62%|██████▏   | 57/92 [24:19<13:46, 23.60s/it]

Current file selected: ext_c2db_0.25_1.json.bz2


1it [00:05,  5.79s/it]
 63%|██████▎   | 58/92 [24:25<10:20, 18.26s/it]

Current file selected: ml_3_AB3C7.json.bz2


1it [00:01,  1.01s/it]
 64%|██████▍   | 59/92 [24:26<07:11, 13.09s/it]

Current file selected: ml_2_A8B11.json.bz2


1it [00:05,  5.05s/it]
 65%|██████▌   | 60/92 [24:31<05:41, 10.68s/it]

Current file selected: ml_2_A2B5.json.bz2


1it [00:04,  4.73s/it]
 66%|██████▋   | 61/92 [24:36<04:35,  8.89s/it]

Current file selected: 3_spg09.json.bz2


1it [00:07,  7.96s/it]
 67%|██████▋   | 62/92 [24:44<04:18,  8.62s/it]

Current file selected: ml_3_AB2C4.json.bz2


1it [00:00,  1.84it/s]
 68%|██████▊   | 63/92 [24:45<02:59,  6.20s/it]

Current file selected: ml_3_AB2C5.json.bz2


1it [00:00,  8.28it/s]
 70%|██████▉   | 64/92 [24:45<02:02,  4.37s/it]

Current file selected: 2_spg15.json.bz2


1it [02:40, 160.17s/it]
 71%|███████   | 65/92 [27:25<23:00, 51.12s/it]

Current file selected: ml_2_A5B9.json.bz2


1it [02:33, 153.20s/it]
 72%|███████▏  | 66/92 [29:58<35:25, 81.74s/it]

Current file selected: ml_2_A5B8.json.bz2


1it [00:08,  8.23s/it]
 73%|███████▎  | 67/92 [30:06<24:52, 59.69s/it]

Current file selected: 3_spg05.json.bz2


1it [00:12, 12.76s/it]
 74%|███████▍  | 68/92 [30:19<18:14, 45.62s/it]

Current file selected: ext_c2db_0.25_6.json.bz2


1it [00:00,  2.42it/s]
 75%|███████▌  | 69/92 [30:20<12:17, 32.06s/it]

Current file selected: ml_3_AB2C8.json.bz2


1it [00:03,  3.04s/it]
 76%|███████▌  | 70/92 [30:23<08:33, 23.35s/it]

Current file selected: ml_2_A2B3.json.bz2


1it [00:18, 18.76s/it]
 77%|███████▋  | 71/92 [30:41<07:41, 21.98s/it]

Current file selected: ml_3_AB2C3.json.bz2


1it [00:00,  1.30it/s]
 78%|███████▊  | 72/92 [30:42<05:12, 15.62s/it]

Current file selected: ml_3_AB2C2.json.bz2


1it [00:03,  3.40s/it]
 79%|███████▉  | 73/92 [30:45<03:47, 11.95s/it]

Current file selected: ml_2_A4B7.json.bz2


1it [00:01,  1.64s/it]
 80%|████████  | 74/92 [30:47<02:39,  8.86s/it]

Current file selected: 2_spg13.json.bz2


1it [02:03, 123.81s/it]
 82%|████████▏ | 75/92 [32:51<12:16, 43.35s/it]

Current file selected: 2_spg12.json.bz2


1it [00:00,  1.19it/s]
 83%|████████▎ | 76/92 [32:52<08:09, 30.60s/it]

Current file selected: ml_3_ABC5.json.bz2


1it [00:26, 26.82s/it]
 84%|████████▎ | 77/92 [33:19<07:22, 29.47s/it]

Current file selected: ml_3_ABC4.json.bz2


1it [03:02, 182.34s/it]
 85%|████████▍ | 78/92 [36:21<17:34, 75.33s/it]

Current file selected: 3_spg13.json.bz2


1it [15:44, 944.80s/it]
 86%|████████▌ | 79/92 [52:06<1:12:50, 336.18s/it]

Current file selected: 3_spg12.json.bz2


1it [00:56, 56.81s/it]
 87%|████████▋ | 80/92 [53:03<50:28, 252.37s/it]  

Current file selected: ml_2_AB5.json.bz2


1it [00:08,  8.10s/it]
 88%|████████▊ | 81/92 [53:11<32:50, 179.09s/it]

Current file selected: ml_2_AB4.json.bz2


1it [00:17, 17.88s/it]
 89%|████████▉ | 82/92 [53:29<21:47, 130.74s/it]

Current file selected: 2_spg30.json.bz2


1it [00:09,  9.09s/it]
 90%|█████████ | 83/92 [53:38<14:08, 94.25s/it] 

Current file selected: 2_spg31.json.bz2


1it [08:45, 525.15s/it]
 91%|█████████▏| 84/92 [1:02:23<29:48, 223.52s/it]

Current file selected: ml_2_A7B10.json.bz2


1it [00:11, 11.85s/it]
 92%|█████████▏| 85/92 [1:02:35<18:40, 160.03s/it]

Current file selected: ml_2_A7B11.json.bz2


1it [00:01,  1.81s/it]
 93%|█████████▎| 86/92 [1:02:37<11:15, 112.56s/it]

Current file selected: ml_4_AB2C6D6.json.bz2


1it [00:19, 19.92s/it]
 95%|█████████▍| 87/92 [1:02:57<07:03, 84.77s/it] 

Current file selected: 2_spg73.json.bz2


1it [12:11, 731.29s/it]
 96%|█████████▌| 88/92 [1:15:08<18:34, 278.73s/it]

Current file selected: ml_2_A7B8.json.bz2


1it [00:08,  8.08s/it]
 97%|█████████▋| 89/92 [1:15:16<09:52, 197.54s/it]

Current file selected: ml_2_A7B9.json.bz2


1it [00:04,  4.99s/it]
 98%|█████████▊| 90/92 [1:15:21<04:39, 139.78s/it]

Current file selected: 2_spg04.json.bz2


1it [00:05,  5.31s/it]
 99%|█████████▉| 91/92 [1:15:26<01:39, 99.44s/it] 

Current file selected: 2_spg05.json.bz2


1it [00:00,  1.58it/s]
100%|██████████| 92/92 [1:15:27<00:00, 49.21s/it]


In [46]:
def save_arr_json(arr, path):
    with open(path, mode = 'w') as line:
        json.dump(arr, line)

def read_arr_json(path):
    with open(path, mode = 'r') as line:
        arr = json.load(line)
    return arr

def save_id_dict(dict, path):
    # BEWARE JSON turns keys into strings and not int
    with open(path, 'w') as line:
        json.dump(dict, line)

def load_id_dict(path):
    with open(path, 'r') as line:
        arr = json.load(line)
    return arr

In [47]:
save_arr_json(forces_arr, "forces_arr.txt")
save_arr_json(stress_arr, "stress_arr.txt")
save_arr_json(energy_arr, "energy_arr.txt")
save_id_dict(dict_id, "id_run.txt")

In [48]:
forces_arr = read_arr_json("forces_arr.txt")
energy_arr = read_arr_json("energy_arr.txt")
stress_arr = read_arr_json("stress_arr.txt")
id_run = load_id_dict("id_run.txt")

In [None]:
def histogram_1d_energy(arr, bin = 500, xrange = [-150,100])
    fig, ax = plt.subplots()
    n, bins, patches = ax.hist(arr, bins = bin)
    ax.set_xlabel("{}".format("Energy")
    ax.set_xlim(xrange)
    ax.set_ylabel("Counts")

In [41]:
print(id_run["14000"])

runs_2/spg29/Ni/NiBr2/xxx_02p-00_29-2-Ni2a.Br4b


In [None]:
print(np.shape(forces_arr))
print(np.shape(stress_arr))
print(stress_arr[1])
print(forces_arr[0])
print(np.shape(energy_arr))

### Import pandas dataframe
Provide the ``overwrite == True`` case to rerun the creation of the dataframe, else if ``overwrite == False`` imports it from the saved .pkl file

In [None]:
df = get_df("geo_opt", overwrite = True)
display(df)

### Plotting Histograms

#### Energy Histogram

In [None]:
histogram_1d(df, quantity = 'energy', bin = 200)

In [None]:
display(df['stress_xx'])

display(df[df['stress_xx'] > 200])
histogram_1d(df, quantity = 'stress_zz', bin = 200, xrange= [-20, 20])
histogram_1d(df, quantity = 'stress_zy', bin = 200, xrange= [-20, 20])
histogram_1d(df, quantity = 'stress_zy', bin = 200, xrange= [-20, 20])

In [None]:
force_arr = separate_forces(df['forces'].to_numpy())    
# print(force_arr)
# print(force_arr[20000])
force = get_coordinate(array= force_arr, axis = "x")
plt.hist(force, bins= 20)
# plt.hist(force, histtype='step')

#  n, bins, patches = ax.hist(force, bins = bin)
#     ax.set_xlabel("{} {}".format("Force", axis))
#     ax.set_xlim(xrange)
#     ax.set_ylabel("Counts")

In [None]:
histogram_1d_force(df, axis = 'x', bin = 10, xrange = [0, 0.4])

In [None]:
energy_force_combined(df)

In [None]:
var = df["forces"].to_numpy()

fig, ax = plt.subplots()
force_arr = separate_forces(var)    

force = abs_forces_per_run(var)
print(type(force))
                            
n, bins, patches = plt.hist(force)
plt.show()

In [None]:
abs_forces_per_run(var_forces= df["forces"].to_numpy())

In [None]:
energy_arr, stress_arr, forces_arr = create_arr("geo_opt_test")

In [None]:
# print(energy_arr)
# print(forces_arr)

print(len(forces_arr))
print(len(energy_arr))

In [None]:
display(df)

In [None]:
positive_entries = df[df['energy'] > 1000]
print(positive_entries)

In [None]:
df = read_df(path = "df_data.pkl")

In [None]:
display(df)
print(df.energy.max())
print(df.energy.min())

In [None]:
histogram_1d_force(df, axis = 'x', bin = 500, xrange = [-150, 150])

In [None]:
histogram_1d(df = df, quantity = "force_x", bin = 500, xrange = [-150,100])

In [None]:
force_arr = separate_forces(var2)
  # print(force_arr)

  print(force_arr[1])

  x_forces = get_coordinate(array= force_arr, axis = "x")

In [None]:
histogram_2d_energy_stress(df)

In [None]:
histogram_1d(df = df, quantity = "energy", bin=2000)

In [None]:
display(df['force_1'])
np.where(pd.isna(df['force_1']))

In [None]:
path = "geo_opt/2_spg01.json.bz2"
path = "geo_opt/ml_3_AB2C4.json.bz2"
quantity = []
path_of_directory = 'geo_opt'
desired_quantity = 'energy'

# df_data = pd.DataFrame(columns = ['structure', 'force_1', 'force_2', 'stress_xx', 'stress_xy', 'stress_xz', "stress_yx", "stress_yy", 'stress_yz', 'stress_zx', 'stress_zy', 'stress_zz'])

df_data = pd.DataFrame(columns=['run', 'energy', 'force_1', 'force_2', 'stress_xx', 'stress_xy', 'stress_xz', 'stress_yx', 'stress_yy', 'stress_yz', 'stress_zx', 'stress_zy', 'stress_zz'])
# file_data = json.load(bz2.open(path, "rb"))
# print(file_data)


def retrieve_forces(force_array):
    force_1, force_2 = force_array[0], force_array[1]
    return force_1, force_2



with bz2.BZ2File(path) as file:
    for line in file:
        line = line.decode().strip()
        if line in {"[", "]"}:
            continue
        if line.endswith(" "):
            line = line[:-1]
        entity =json.loads(line)
        print(entity)
        for runs in entity.keys():
            for item in entity[runs]:
                struc = Structure.from_dict(item['structure'])
                force_1, force_2 = retrieve_forces(item['forces'])
                stress = item['stress']
    
                energy = item['energy']

                data_run = pd.DataFrame({'run': runs,
                                         "structure": [struc],
                                         "energy": energy, 
                                         "force_1": [force_1], "force_2": [force_2], 
                                         'stress_xx': [stress[0][0]], 'stress_xy': [stress[0][1]], 'stress_xz':[stress[0][2]], 
                                         'stress_yx': [stress[1][0]], 'stress_yy': [stress[1][1]], 'stress_yz': [stress[1][2]], 
                                         'stress_zx': [stress[2][0]], 'stress_zy': [stress[2][1]], 'stress_zz': [stress[2][2]]})

                data_run = pd.DataFrame(data_run)
                df_data = pd.concat([df_data, data_run])
 

df_data.memory_usage()

df_data.reset_index(inplace=True, drop =True)
cum_series = df_data.groupby('run').cumcount()
cum_series = cum_series.astype(str)
df_data['run'] = df_data['run'] + "_" + cum_series

display(df_data)
        

def plotting_function(df, desired_quantity, savename):
        counts, bins = np.histogram(df[desired_quantity])
        plt.hist(bins[:-1], bins, weights = counts)
        
        plt.savefig("Figures/{}.pdf".format(savename))


# print(dtype(df_data['energy']))

print(df_data.dtypes)

energy = df_data['energy'].to_numpy()
# print(energy)
stress_xx = df_data['stress_xx'].to_numpy()

# histo = np.histogram2d(energy, stress_xx, bins = (20, 20))
# print(type(histo))






    # fig, ax = plt.subplots()
    # hist = ax.hist2d(var1, var2, bins = (bin, bin), cmap = plt.cm.jet)
    # fig.colorbar(hist)

    # fig.tight_layout()
    # fig.savefig("Figures/{}.pdf".format(savename_fig))
    # plt.show()  








# plt.imshow(histo)

# plotting_function(df_data, "energy", "energy_dist")



#         # Get all the keys (runs)
#         # print(entity.keys())

#         for runs in entity.keys():
#             # The argument in entity specifies the run
#             for i in entity[runs]:                    
# #                 print(i.keys())
#                 value = i[desired_quantity]
#                 forces = i['forces']
#                 stress = i['stress']
#                 structure = i['structure']
#                 energy = i['energy']
#                 # print(forces)
#                 quantity.append(value)
                
# print(quantity)

In [None]:
def return_quantity(directory, desired_quantity):
    # quantity can be structure, energy, forces, stress
#     path = "geo_opt/2_spg01.json.bz2"
    quantity = []
#     ext = (".bz2")
    print("Number of files in directory: {}".format(len(os.listdir(directory))))
    file_names = os.listdir(directory)
    # print(file_names)
#     for files in os.listdir(directory):
    for files in file_names:
        print("Current file selected: {}".format(files))
        with bz2.BZ2File("{}/{}".format(directory,files)) as file:
            for line in file:
                line = line.decode().strip()
                if line in {"[", "]"}:
                    continue
                if line.endswith(" "):
                    line = line[:-1]
                entity =json.loads(line)

                # Get all the keys (runs)
                # print(entity.keys())

                for runs in entity.keys():
                    # The argument in entity specifies the run
                    for i in entity[runs]:                    
#                         print(i.keys())
                        value = i[desired_quantity]
                        quantity.append(value)
#                         forces = i['forces']
#                         stress = i['stress']
#                         structure = i['structure']
#                         energy = i['energy']
#                         print(forces)
                
    with open("{}.txt".format(desired_quantity), 'w') as f:
        for s in quantity:
            f.write(str(s) + '\n')

    return quantity


energy = return_quantity('geo_opt','energy')

In [None]:
with open("energy.txt", 'r') as f:
    energy = [line.rstrip('\n') for line in f]
    print(energy)

# print(energy)

In [None]:
print(energy)

In [None]:
counts, bins = np.histogram(energy)
plt.hist(bins[:-1], bins, weights = counts)