In [None]:
# Supercell Generator: converts sublattices into supercell files of NxN dimension. Aligns positions so lattice vectors a,b are in the xy-plane.
# Also shifts atom positions of materials that straddle the unit cell border in the ab plane.
# Note: two of these will be needed (of different supercell sizes) in order to make use of the variable supercell size augmentations
# Naming: each file is saved in the form supercell_2dm-{ID}_{Element}.xyz


import os
import numpy as np
import re
from ase.io import read, write
from ase.build import make_supercell
from ase import Atoms
from scipy.spatial.transform import Rotation as R

# The folder containing the sublattice XYZ files generated in 1)
INPUT_FOLDER = "Materials/2DMatpedia Sublattices xyz"
# The output folder for the supercells
OUTPUT_FOLDER = 'Supercells/2DMatpedia Sublattices 3x3'
# The size of the supercell in N unit cells for an NxN arrangement
SUPERCELL_SIZE = 4

# 1. Function to rotate, align, and shift a single ASE Atoms object
def transform_cell(atoms):
    """
    Applies the rotation to align the 2D layer to xy-plane, then
    aligns vector a along x-axis, then shifts the atoms in z if needed.
    Returns a new ASE Atoms with these transformations applied.
    """
    # Extract lattice (cell) and positions from the original atoms
    cell = atoms.get_cell()
    positions = atoms.get_positions()

    # Convert to numpy arrays
    cell = np.array(cell)
    positions = np.array(positions)

    a = cell[0]
    b = cell[1]
    c_vector = cell[2]

    # a) Check if we need a rotation to put the plane in xy-plane
    if np.isclose(a[2], 0.0, atol=1e-6) and np.isclose(b[2], 0.0, atol=1e-6):
        rotation_matrix = np.eye(3)
        rotated_cell = cell.copy()
        rotated_positions = positions.copy()
        print("No rotation needed; a and b are already in the xy-plane.")
    else:
        # Rotate so that the normal of (a,b) aligns with z
        normal_vector = np.cross(a, b)
        norm = np.linalg.norm(normal_vector)
        if norm < 1e-12:
            # Degenerate case: a×b=0 means no well-defined plane
            # Just skip or do no rotation
            print("Warning: degenerate lattice vectors, skipping plane rotation.")
            rotation_matrix = np.eye(3)
            rotated_cell = cell.copy()
            rotated_positions = positions.copy()
        else:
            normal_vector /= norm
            z_axis = np.array([0, 0, 1])
            dot_product = np.dot(normal_vector, z_axis)
            rotation_angle = np.arccos(np.clip(dot_product, -1.0, 1.0))

            # Axis for rotation is cross(normal, z_axis)
            rotation_axis = np.cross(normal_vector, z_axis)
            axis_norm = np.linalg.norm(rotation_axis)
            if axis_norm < 1e-12:
                # normal is almost parallel to z-axis
                rotation_axis = np.array([1, 0, 0])  # arbitrary
            else:
                rotation_axis /= axis_norm

            rotation = R.from_rotvec(rotation_angle * rotation_axis)
            rotation_matrix = rotation.as_matrix()

            rotated_cell = (rotation_matrix @ cell.T).T
            rotated_positions = (rotation_matrix @ positions.T).T
            print("Rotated to align layer normal with z-axis.")

    # b) Rotate around z-axis to align a along x-axis
    a_rot = rotated_cell[0]
    a_xy = a_rot[:2]
    norm_a_xy = np.linalg.norm(a_xy)

    if norm_a_xy > 1e-6:
        theta = np.arctan2(a_xy[1], a_xy[0])
        rotation_z = np.array([
            [np.cos(-theta), -np.sin(-theta), 0],
            [np.sin(-theta),  np.cos(-theta), 0],
            [           0,             0,     1]
        ])
        rotated_cell = (rotation_z @ rotated_cell.T).T
        rotated_positions = (rotation_z @ rotated_positions.T).T
        print("Additional rotation around z-axis to align 'a' with x-axis.")
    else:
        print("Vector 'a' is already aligned with x-axis (or nearly so).")

    # Clean small floating noise
    rotated_cell[np.abs(rotated_cell) < 1e-8] = 0.0
    rotated_positions[np.abs(rotated_positions) < 1e-8] = 0.0

    # Create a new ASE Atoms with the rotated cell
    unit_cell = Atoms(
        symbols=atoms.get_chemical_symbols(),
        positions=rotated_positions,
        cell=rotated_cell,
        pbc=[True, True, False]
    )

    # c) SHIFT layers along c-axis if needed
    scaled_positions = unit_cell.get_scaled_positions()
    z_min, z_max = np.min(scaled_positions[:, 2]), np.max(scaled_positions[:, 2])
    threshold_low, threshold_high = 0.25, 0.75
    if z_min < threshold_low and z_max > threshold_high:
        print("Layer spans entire c-range; shifting to center in z.")
        angles = 2 * np.pi * scaled_positions[:, 2]
        complex_nums = np.exp(1j * angles)
        mean_complex = np.mean(complex_nums)
        mean_angle = np.angle(mean_complex)
        z_mean = mean_angle / (2 * np.pi)
        if z_mean < 0:
            z_mean += 1
        shift = 0.5 - z_mean
        scaled_positions[:, 2] += shift
        scaled_positions[:, 2] = np.mod(scaled_positions[:, 2], 1.0)
        unit_cell.set_scaled_positions(scaled_positions)

    return unit_cell

# 2. Code that parses the .xyz files and generates and saves the supercell files 
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

transformation_matrix = np.array([
    [SUPERCELL_SIZE, 0, 0],
    [0, SUPERCELL_SIZE, 0],
    [0, 0, 1]
])

# List the .xyz files in the input folder with natural sorting
def natural_sort_key(filename):
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', filename)]

xyz_files = [f for f in os.listdir(INPUT_FOLDER) if f.endswith('.xyz')]
xyz_files.sort(key=natural_sort_key)

for xyz_file in xyz_files:
    # Read the ASE Atoms
    filepath = os.path.join(INPUT_FOLDER, xyz_file)
    try:
        atoms = read(filepath, format='extxyz')
    except Exception as e:
        print(f"Could not read {xyz_file}: {e}")
        continue

    # The material ID can be derived from the file name (e.g. "2dm-9_Au.xyz")
    material_id = os.path.splitext(xyz_file)[0]

    # Apply transformations (rotate, align, shift)
    unit_cell = transform_cell(atoms)

    # Create the supercell (NxN)
    supercell = make_supercell(unit_cell, transformation_matrix)

    # Output file name
    out_fname = f"supercell_{material_id}.xyz"
    out_path = os.path.join(OUTPUT_FOLDER, out_fname)

    # Write the supercell
    write(out_path, supercell, format='extxyz')
    print(f"Supercell created for {material_id} => {out_fname}")

print(f"\nAll supercells written to '{OUTPUT_FOLDER}'.")

In [None]:
# Alternative Supercell Generator: A smaller dataset consisting of 2132 flatband sublattices contained within the larger dataset
# used above. Incorporates the exact same logic except except it extracts the initial structure files from 
# a reduced_sublattice_structure.txt' file that can be found in the GitHub repository

import os
import numpy as np
import ast
import re
from ase.io import write
from ase.build import make_supercell
from ase import Atoms
from scipy.spatial.transform import Rotation as R


SUPERCELL_SIZE = 3
INPUT_FILE = 'Materials/reduced_sublattice_structure.txt'
OUTPUT_FOLDER = 'Supercells/Flatband Sublattices 3x3'


# 1. Function to rotate, align, and shift a single ASE Atoms object
def transform_cell(atoms):
    """
    Rotate the lattice so that (a,b) lies in the xy-plane, 
    align vector a along the x-axis, then shift the layer along c if needed.
    Returns a new ASE Atoms instance with these transformations applied.
    """

    # Extract cell and positions
    cell = atoms.get_cell()
    positions = atoms.get_positions()

    # Convert to NumPy arrays (in case they're ASE arrays)
    cell = np.array(cell)
    positions = np.array(positions)

    a = cell[0]
    b = cell[1]

    # a) Rotate plane into xy-plane
    # Check if z-components of a and b are already ~0
    if np.isclose(a[2], 0.0, atol=1e-6) and np.isclose(b[2], 0.0, atol=1e-6):
        # No rotation needed
        rotation_matrix = np.eye(3)
        rotated_cell = cell.copy()
        rotated_positions = positions.copy()
        print("No rotation needed; a and b are already in the xy-plane.")
    else:
        # Compute the normal of (a,b)
        normal_vector = np.cross(a, b)
        norm = np.linalg.norm(normal_vector)
        if norm < 1e-12:
            # Degenerate case: a×b=0 means no well-defined plane
            print("Warning: degenerate lattice vectors, skipping plane rotation.")
            rotation_matrix = np.eye(3)
            rotated_cell = cell.copy()
            rotated_positions = positions.copy()
        else:
            normal_vector /= norm
            z_axis = np.array([0, 0, 1])
            dot_product = np.dot(normal_vector, z_axis)
            rotation_angle = np.arccos(np.clip(dot_product, -1.0, 1.0))

            # Axis for rotation is cross(normal, z_axis)
            rotation_axis = np.cross(normal_vector, z_axis)
            axis_norm = np.linalg.norm(rotation_axis)
            if axis_norm < 1e-12:
                # normal ~ parallel to z
                rotation_axis = np.array([1.0, 0.0, 0.0])  # arbitrary
            else:
                rotation_axis /= axis_norm

            rotation = R.from_rotvec(rotation_angle * rotation_axis)
            rotation_matrix = rotation.as_matrix()

            rotated_cell = (rotation_matrix @ cell.T).T
            rotated_positions = (rotation_matrix @ positions.T).T
            print("Rotated to align layer normal with z-axis.")

    # b) Rotate around z-axis to align a along x 
    a_rot = rotated_cell[0]
    a_xy = a_rot[:2]
    norm_a_xy = np.linalg.norm(a_xy)

    if norm_a_xy > 1e-6:
        theta = np.arctan2(a_xy[1], a_xy[0])
        rotation_z = np.array([
            [ np.cos(-theta), -np.sin(-theta), 0],
            [ np.sin(-theta),  np.cos(-theta), 0],
            [           0,              0,     1]
        ])
        rotated_cell = (rotation_z @ rotated_cell.T).T
        rotated_positions = (rotation_z @ rotated_positions.T).T
        print("Additional rotation around z-axis to align 'a' with x-axis.")
    else:
        print("Vector 'a' is already aligned with x-axis (or nearly so).")

    # Clean small numerical noise
    rotated_cell[np.abs(rotated_cell) < 1e-8] = 0.0
    rotated_positions[np.abs(rotated_positions) < 1e-8] = 0.0

    # Create new Atoms object with the rotated cell
    transformed_cell = Atoms(
        symbols=atoms.get_chemical_symbols(),
        positions=rotated_positions,
        cell=rotated_cell,
        pbc=[True, True, False]
    )

    # c) Shift layers along c-axis if needed
    scaled_positions = transformed_cell.get_scaled_positions()
    z_min = np.min(scaled_positions[:, 2])
    z_max = np.max(scaled_positions[:, 2])
    threshold_low = 0.25
    threshold_high = 0.75

    if z_min < threshold_low and z_max > threshold_high:
        print("Layer spans entire c-range; shifting to center in z.")
        angles = 2 * np.pi * scaled_positions[:, 2]
        complex_nums = np.exp(1j * angles)
        mean_complex = np.mean(complex_nums)
        mean_angle = np.angle(mean_complex)
        z_mean = mean_angle / (2 * np.pi)
        if z_mean < 0:
            z_mean += 1
        shift = 0.5 - z_mean
        scaled_positions[:, 2] += shift
        scaled_positions[:, 2] = np.mod(scaled_positions[:, 2], 1.0)
        transformed_cell.set_scaled_positions(scaled_positions)

    return transformed_cell

# 2. Code that parses the structures from the flatband dataset file ('reduced_sublattice_structure.txt')
def parse_atom_line(line):
    """
    Helper function to parse an atom line of the form:
        index? label [x,y,z] [a,b,c] ...
    """
    # Remove leading index if present
    line = line.strip()
    if line and line[0].isdigit():
        parts = line.split(None, 1)
        line = parts[1]

    parts = line.split(None, 1)
    if len(parts) < 2:
        raise ValueError("Could not parse atom line: {}".format(line))
    label = parts[0]
    rest = parts[1]

    # The expected order is (xyz) (abc) species properties, often in brackets
    pattern = r'\[.*?\]|\{.*?\}|[^\s]+'
    matches = re.findall(pattern, rest)
    if len(matches) >= 4:
        xyz_str = matches[0]
        abc_str = matches[1]
        species_str = matches[2]
        properties_str = matches[3]
        return {
            'label': label,
            'xyz': xyz_str,
            'abc': abc_str,
            'species': species_str,
            'properties': properties_str
        }
    else:
        raise ValueError("Could not parse atom line: {}".format(line))

# 3. contiuation of 2. but returns the actual atoms and their positions instead of material features
def parse_structures(filename):
    """
    Reads the custom text file format and returns a list of materials,
    each as a dict with keys: id, formula, some_code, lattice_vectors, atoms
    """
    materials = []
    with open(filename, 'r') as f:
        lines = f.readlines()

    i = 0
    while i < len(lines):
        line = lines[i].strip()
        # Each material block starts with a line that begins with '2dm-...'
        if line.startswith('2dm-'):
            material = {}
            material['id'] = line.strip()
            i += 1

            # Next line has formula and code
            line = lines[i].strip()
            formula_and_code = line.split()
            material['formula'] = formula_and_code[0]
            material['some_code'] = formula_and_code[1] if len(formula_and_code) > 1 else ''
            i += 1

            # Read lattice vectors
            if lines[i].strip() == 'lattice vector':
                i += 1
                material['lattice_vectors'] = ast.literal_eval(lines[i].strip())
                i += 1
            else:
                raise ValueError(f"Expected 'lattice vector' at line {i+1}")

            # Skip empty lines
            while i < len(lines) and not lines[i].strip():
                i += 1

            # Read headers (though we don't necessarily need them)
            if i < len(lines):
                headers = lines[i].strip().split()
                i += 1
            else:
                break

            # Read atom data lines until next '2dm-' or empty
            atoms = []
            while i < len(lines):
                line = lines[i].strip()
                if not line or line.startswith('2dm-'):
                    break
                try:
                    atom_dict = parse_atom_line(line)
                    atoms.append(atom_dict)
                except ValueError as e:
                    print(f"Warning: {e}")
                i += 1

            material['atoms'] = atoms
            materials.append(material)
        else:
            i += 1
    return materials

# 4. generates the supercells and saves them into the ouput folder

# The transformation matrix for building the supercell
transformation_matrix = np.array([
    [SUPERCELL_SIZE, 0,            0],
    [0,              SUPERCELL_SIZE,0],
    [0,              0,            1]
])

os.makedirs(OUTPUT_FOLDER, exist_ok=True)

# Parse materials from your text file
materials = parse_structures(INPUT_FILE)

# Generate supercells
for material in materials:
    # Extract cell and atom positions
    cell = np.array(material['lattice_vectors'], dtype=float)
    atoms_data = material['atoms']
    symbols = []
    positions = []

    for atom in atoms_data:
        symbol = atom['label']
        # Convert the string (like "[x,y,z]") to a Python list
        position = np.array(ast.literal_eval(atom['xyz']), dtype=float)
        symbols.append(symbol)
        positions.append(position)

    positions = np.array(positions)

    # Build the initial ASE Atoms
    raw_atoms = Atoms(
        symbols=symbols,
        positions=positions,
        cell=cell,
        pbc=[True, True, False]
    )

    # Transform the unit cell (align in xy-plane, shift if needed)
    unit_cell = transform_cell(raw_atoms)

    # Make the supercell
    supercell = make_supercell(unit_cell, transformation_matrix)

    element_str = symbols[0]

    out_filename = os.path.join(
        OUTPUT_FOLDER, 
        f"supercell_{material['id']}_{element_str}.xyz"
    )

    write(out_filename, supercell, format='extxyz')

    print(f"Supercell created for {material['id']} => {out_filename}")

print(f"\nAll supercells written to '{OUTPUT_FOLDER}'.")

In [None]:
#Alternative Supercell Generator: A larger dataset using the C2DB database for 2D materials

import os
import numpy as np
import re
from ase.io import read, write
from ase.build import make_supercell
from ase import Atoms



# The folder containing the sublattice XYZ files generated in 1)
INPUT_FOLDER = "Materials/c2db Sublattices xyz"
# The output folder for the supercells
OUTPUT_FOLDER = 'Supercells/c2db Sublattices 3x3'
# The size of the supercell in N unit cells for an NxN arrangement
SUPERCELL_SIZE = 3

#  Code that parses the .xyz files and generates and saves the supercell files 
os.makedirs(OUTPUT_FOLDER, exist_ok=True)

transformation_matrix = np.array([
    [SUPERCELL_SIZE, 0, 0],
    [0, SUPERCELL_SIZE, 0],
    [0, 0, 1]
])

# List the .xyz files in the input folder with natural sorting
def natural_sort_key(filename):
    return [int(text) if text.isdigit() else text.lower() for text in re.split(r'(\d+)', filename)]

xyz_files = [f for f in os.listdir(INPUT_FOLDER) if f.endswith('.xyz')]
xyz_files.sort(key=natural_sort_key)

for xyz_file in xyz_files:
    # Read the ASE Atoms
    filepath = os.path.join(INPUT_FOLDER, xyz_file)
    try:
        atoms = read(filepath, format='extxyz')
    except Exception as e:
        print(f"Could not read {xyz_file}: {e}")
        continue

    # The material ID can be derived from the file name (e.g. "2dm-9_Au.xyz")
    material_id = os.path.splitext(xyz_file)[0]

    # Create the supercell (NxN)
    supercell = make_supercell(atoms, transformation_matrix)

    # Output file name
    out_fname = f"supercell_{material_id}.xyz"
    out_path = os.path.join(OUTPUT_FOLDER, out_fname)

    # Write the supercell
    write(out_path, supercell, format='extxyz')
    print(f"Supercell created for {material_id} => {out_fname}")

print(f"\nAll supercells written to '{OUTPUT_FOLDER}'.")