In [None]:
from __future__ import annotations

import warnings

import torch
from pymatgen.core import Lattice, Structure

import matgl

# To suppress warnings for clearer output
warnings.simplefilter("ignore")

import time
import csv
from tqdm import tqdm
import pandas as pd

In [None]:
import os
phase = "Phase 4 2.0"    # Change here
 

In [None]:
folder_path = f"{phase}"  # CHANGE DIRECTORY

file_list = [f for f in os.listdir(folder_path) if os.path.isfile(os.path.join(folder_path, f))]
len(file_list)


In [None]:
file_list[0][:-4]

In [None]:
start = time.process_time()
start_2 = time.time()

folder_name = f"{phase} cif"          # CHANGE NAME OF THE FOLDER
# Ensure the results directory exists
os.makedirs(f"{folder_name}", exist_ok=True)

model = matgl.load_model("MEGNet-MP-2019.4.1-BandGap-mfi")
bandgap_output = []

for c,j in tqdm(enumerate(file_list), total = len(file_list), desc = "Processing files"):
    bandgap_list = []
    atoms = io.read(f"{phase}/{j}")
    io.write(f"{folder_name}/{j[:-4]}.cif", atoms)
    struct_2 = Structure.from_file(f"{folder_name}/{j[:-4]}.cif")

    for i, method in ((0, "PBE"), (1, "GLLB-SC"), (2, "HSE"), (3, "SCAN")):
        graph_attrs = torch.tensor([i])
        bandgap = model.predict_structure(structure=struct_2, state_attr=graph_attrs)
        #print(f"The predicted {method} band gap for CsCl is {float(bandgap):.3f} eV.")
        bandgap_list.append(bandgap)

    bandgap_output.append((j,bandgap_list))
    
end = time.process_time()
end_2 = time.time()

print(f"CPU Time: {end - start} seconds")
print(f"Elapsed Time: {end_2 - start_2} seconds")
    

In [None]:
def flatten_once(nested_list):
    flattened = []
    for item in nested_list:
        if isinstance(item, list):
            flattened.extend(item)  # Unpack one level
        else:
            flattened.append(item)  # Keep non-list items
    return flattened

flat_list = []
for i in bandgap_output:
    flat_list.append(flatten_once(i))




In [None]:
# Load CSV file (replace 'data.csv' with your actual filename)
df_csv = pd.read_csv(f'{phase} full result v2.csv')  

In [None]:
df_list = pd.DataFrame(flat_list,columns=["File name", "PBE", "GLLB-SC", "HSE", "SCAN"])

In [None]:
# Merge both datasets on "ID"
merged_df = pd.merge(df_csv, df_list, on="File name", how="left")  # Use 'outer' to include all entries


In [None]:
merged_df.to_csv(f'{phase} full list bandgap.csv', index = False)

In [None]:
file_path = f'{phase} flat list bg.csv'  # CHANGE FILE NAME

# Writing to CSV
with open(file_path, mode='w', newline='') as file:
    writer = csv.writer(file)

    # Write the header row
    writer.writerow(["File name", "PBE", "GLLB-SC", "HSE", "SCAN"])   
    # Write each row (tuple in this case) to the CSV
    for row in flat_list:   
        writer.writerow(row)

print(f"Data saved to {file_path}")

