Generative model for next-layers prediction of entire PSC cell stack sequences

Preparing dataframe with desired features/layers

In [122]:
import pandas as pd

# Load the dataset
df = pd.read_csv(r'C:\Users\c\OneDrive\Documents\PEROVSKITE PROJECT\PerovskiteML_project\Data\Perovsite database query.csv')

# Strip whitespace from ALL string entries
df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)

# Remove rows with NaN values in key columns
df = df.dropna(subset=['Substrate_stack_sequence', 'ETL_stack_sequence', 'HTL_stack_sequence', 'Backcontact_stack_sequence', 
                       'Perovskite_composition_long_form', 'JV_default_PCE', 'Stability_PCE_end_of_experiment'])

# Define columns to keep
columns_to_keep = [
    'Cell_stack_sequence', 'Cell_architecture',
    'Substrate_stack_sequence',
    'ETL_stack_sequence', 'ETL_thickness', 'ETL_additives_compounds',
    'Perovskite_composition_a_ions', 'Perovskite_composition_a_ions_coefficients', 
    'Perovskite_composition_b_ions', 'Perovskite_composition_b_ions_coefficients',
    'Perovskite_composition_c_ions', 'Perovskite_composition_c_ions_coefficients', 
    'Perovskite_composition_short_form', 'Perovskite_composition_long_form', 'Perovskite_composition_leadfree', 'Perovskite_composition_inorganic',
    'Perovskite_additives_compounds', 'Perovskite_additives_concentrations', 'Perovskite_thickness', 'Perovskite_band_gap',
    'HTL_stack_sequence', 'HTL_thickness_list', 'HTL_additives_compounds',
    'Backcontact_stack_sequence',
    'Encapsulation', 'Encapsulation_stack_sequence', 'JV_default_PCE',
    'JV_default_Voc', 'JV_default_Jsc', 'JV_default_FF', 'JV_hysteresis_index', 'Stability_PCE_end_of_experiment'
]

# Keep only selected columns
df = df[columns_to_keep]

# Ensure all stack sequence columns are stripped of any extra spaces inside entries
for col in ['Substrate_stack_sequence', 'ETL_stack_sequence', 'HTL_stack_sequence', 'Backcontact_stack_sequence', 'Perovskite_composition_short_form']:
    df[col] = df[col].str.replace(r'\s*\|\s*', '|', regex=True)  # Remove spaces around '|'

# Now filter for common materials
df = df[
    df['Substrate_stack_sequence'].isin(['SLG|FTO', 'SLG|ITO']) &  
    df['ETL_stack_sequence'].isin(['TiO2-c|TiO2-mp', 'TiO2-c', 'PCBM-60|BCP', 'C60|BCP']) & 
    df['HTL_stack_sequence'].isin(['Spiro-MeOTAD', 'PEDOT:PSS']) &
    df['Backcontact_stack_sequence'].isin(['Au', 'Ag', 'Al', 'Carbon']) &
    df['Perovskite_composition_short_form'].isin(['MAPbI', 'CsFAMAPbBrI', 'FAMAPbBrI', 'CsPbBrI', 'MAPbBrI', 'FAPbI', 'CsFAPbBrI', 'CsPbBrI', 'CsPbI', 'CsFAPbI', 'MAPbBr']) &
    df['Cell_architecture'].isin(['nip'])       
]

# Classify perovskites into single-layered or multi-layered
ion_columns = [
    'Perovskite_composition_a_ions', 'Perovskite_composition_a_ions_coefficients', 
    'Perovskite_composition_b_ions', 'Perovskite_composition_b_ions_coefficients',
    'Perovskite_composition_c_ions', 'Perovskite_composition_c_ions_coefficients'
]

df['Layer Type'] = df.apply(
    lambda row: 'Multi-layered Perovskite' if any('|' in str(row[col]) for col in ion_columns) else 'Single-layered Perovskite',
    axis=1
)

# Keep only single-layered perovskites
filtered_df = df[df['Layer Type'] == 'Single-layered Perovskite']

# Create 'new cell stack' column by concatenating layers in the specified order
filtered_df['Cleaned cell stack'] = filtered_df['Substrate_stack_sequence'].astype(str) + ', ' + \
                       df['ETL_stack_sequence'].astype(str) + ', ' + \
                       df['Perovskite_composition_long_form'].astype(str) + ', ' + \
                       df['HTL_stack_sequence'].astype(str) + ', ' + \
                       df['Backcontact_stack_sequence'].astype(str)

filtered_df['Cleaned cell stack'] = filtered_df['Cleaned cell stack'].dropna().apply(lambda seq: f"<SOS>,{seq},<EOS>")

filtered_df['Perovskite_composition_long_form'] = filtered_df['Perovskite_composition_long_form'].str.strip()

# Save cleaned data
filtered_df.to_csv('cleaned_data_for_generative.csv', index=False)

  df = pd.read_csv(r'C:\Users\c\OneDrive\Documents\PEROVSKITE PROJECT\PerovskiteML_project\Data\Perovsite database query.csv')
  df = df.applymap(lambda x: x.strip() if isinstance(x, str) else x)


Encoding the perovskite layers - K-NN CLUSTERING

In [None]:
# Prepare for Transformer model: Define a custom embedding layer for materials

import torch
import torch.nn as nn

# Number of materials (including perovskite flattened vectors)
num_materials = len(material_to_id) + len(ions)  # Add ions for flattened perovskite vector
embedding_dim = 16  # Example embedding dimension

class MaterialEmbeddingLayer(nn.Module):
    def __init__(self, num_materials, embedding_dim):
        super(MaterialEmbeddingLayer, self).__init__()
        self.embeddings = nn.Embedding(num_materials, embedding_dim)
    
    def forward(self, x):
        return self.embeddings(x)

# Example usage
embedding_layer = MaterialEmbeddingLayer(num_materials, embedding_dim)

# Example input: Full stack vector (without flattening)
example_input = torch.tensor([material_to_id['Substrate1'], material_to_id['ETL1'], material_to_id['Pb'], material_to_id['HTL1'], material_to_id['Backcontact1']])

embedded_output = embedding_layer(example_input)

print("Embedded output:", embedded_output)


### Simplifying the model - LSTM
This LSTM-based language model (built and trained using PyTorch) will be used to generate PSC cell stacks with varied amounts of randomness, sampling from materials in the different layers. It uses a language modelling approach to predict the next 'token' (material/layer) in each sequence, learning from existing cell stacks.

1. Creating the model (using unique integer IDs)

In [130]:
import pandas as pd
import torch
from torch.utils.data import Dataset

class PSCStackDataset(Dataset):
    def __init__(self, csv_file, column_name):
        self.data = pd.read_csv(csv_file)[column_name].dropna().tolist()
        self.data = [f"<SOS>,{seq},<EOS>" for seq in self.data]  # Ensure order and special tokens

        # Build vocab
        self.vocab, self.idx_to_stack = self.build_vocab(self.data)

        # Encode dataset
        self.encoded_data = [self.encode(seq) for seq in self.data]

        # 🔹 Store allowed materials per layer position
        self.allowed_per_position = self.get_allowed_per_position()

    def build_vocab(self, sequences):
        unique_tokens = set()
        for seq in sequences:
            unique_tokens.update(seq.split(','))  # Collect unique materials/tokens

        # Ensure special tokens are included
        unique_tokens.update(['<SOS>', '<EOS>'])

        # Create consistent index mappings
        stack_to_idx = {token: idx for idx, token in enumerate(sorted(unique_tokens))}
        idx_to_stack = {idx: token for token, idx in stack_to_idx.items()}

        return stack_to_idx, idx_to_stack

    def encode(self, sequence):
        return [self.vocab[token] for token in sequence.split(',')]

    def get_allowed_per_position(self):
        """Extracts unique materials at each layer position in the sequence."""
        allowed = {i: set() for i in range(7)}  # 7 positions: <SOS>, Substrate, ETL, Perovskite, HTL, Backcontact, <EOS>

        for seq in self.data:
            tokens = seq.split(',')
            if len(tokens) == 7:  # Ensure valid sequences
                for i, token in enumerate(tokens):
                    allowed[i].add(token)

        return {i: sorted(list(materials)) for i, materials in allowed.items()}

    def __len__(self):
        return len(self.encoded_data)

    def __getitem__(self, idx):
        seq = self.encoded_data[idx]
        return torch.tensor(seq[:-1], dtype=torch.long), torch.tensor(seq[1:], dtype=torch.long)  # Shifted for prediction

# 🔹 Define LSTM model
class LSTMModel(nn.Module):
    def __init__(self, vocab_size, embed_size, hidden_size, num_layers):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_size)
        self.lstm = nn.LSTM(embed_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, vocab_size)
    
    def forward(self, x, hidden=None):
        x = self.embedding(x)
        output, hidden = self.lstm(x, hidden)
        output = self.fc(output)
        return output, hidden


# 🔹 Temperature sampling function
def sample_with_temperature(probs, temperature=1.0):
    probs = torch.pow(probs, 1.0 / temperature)
    probs /= torch.sum(probs)
    return torch.multinomial(probs, num_samples=1).item()


# 🔹 Hyperparameters
EMBED_SIZE = 128
HIDDEN_SIZE = 256
NUM_LAYERS = 2
BATCH_SIZE = 32
EPOCHS = 10
LR = 0.001

# 🔹 Load dataset
dataset = PSCStackDataset(csv_file='cleaned_data_for_generative.csv', column_name='Cleaned cell stack')
dataloader = DataLoader(dataset, batch_size=BATCH_SIZE, shuffle=True)

df = pd.read_csv(r'cleaned_data_for_generative.csv')
# 🔹 Initialize model
model = LSTMModel(vocab_size=len(dataset.vocab), embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, num_layers=NUM_LAYERS)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LR)

print("Vocabulary size:", len(dataset.vocab))
print("Sample vocabulary items:", list(dataset.vocab.items())[:10])

Vocabulary size: 133
Sample vocabulary items: [(' Ag', 0), (' Au', 1), (' Carbon', 2), (' Cs0.025FA0.81MA0.15PbBr0.45I2.5', 3), (' Cs0.025FA0.825MA0.15PbBr0.45I2.55', 4), (' Cs0.02FA0.79MA0.16PbBr0.551I2.49', 5), (' Cs0.02FA0.83MA0.17PbBr0.51I2.49', 6), (' Cs0.04FA0.81MA0.14PbBr0.43I2.57', 7), (' Cs0.05FA0.75MA0.2PbBr0.3I2.7', 8), (' Cs0.05FA0.76MA0.16PbBr0.48I2.52', 9)]


2. Train the model

In [131]:
# Training loop
device = torch.device("cpu")  # Keeping it on CPU
model.to(device)

for epoch in range(EPOCHS):
    model.train()
    total_loss = 0

    for batch_idx, (inputs, targets) in enumerate(dataloader):
        inputs, targets = inputs.to(device), targets.to(device)

        optimizer.zero_grad()
        outputs, _ = model(inputs)  # Forward pass

        # 🔹 Reshape output to match target
        loss = criterion(outputs.view(-1, len(dataset.vocab)), targets.view(-1))

        # 🔹 Backpropagation
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

        # 🔹 Optional: Print batch loss every N batches
        if batch_idx % 10 == 0:  # Change 10 to a suitable number
            print(f"Epoch [{epoch+1}/{EPOCHS}], Batch [{batch_idx}/{len(dataloader)}], Loss: {loss.item():.4f}")

    # 🔹 Print epoch loss
    print(f"Epoch [{epoch+1}/{EPOCHS}] Finished - Average Loss: {total_loss/len(dataloader):.4f}")

Epoch [1/10], Batch [0/47], Loss: 4.8819
Epoch [1/10], Batch [10/47], Loss: 2.3195
Epoch [1/10], Batch [20/47], Loss: 1.4075
Epoch [1/10], Batch [30/47], Loss: 1.0192
Epoch [1/10], Batch [40/47], Loss: 0.8487
Epoch [1/10] Finished - Average Loss: 1.7833
Epoch [2/10], Batch [0/47], Loss: 0.6678
Epoch [2/10], Batch [10/47], Loss: 0.5699
Epoch [2/10], Batch [20/47], Loss: 0.5703
Epoch [2/10], Batch [30/47], Loss: 0.5276
Epoch [2/10], Batch [40/47], Loss: 0.4535
Epoch [2/10] Finished - Average Loss: 0.5723
Epoch [3/10], Batch [0/47], Loss: 0.4796
Epoch [3/10], Batch [10/47], Loss: 0.4769
Epoch [3/10], Batch [20/47], Loss: 0.5346
Epoch [3/10], Batch [30/47], Loss: 0.5518
Epoch [3/10], Batch [40/47], Loss: 0.4094
Epoch [3/10] Finished - Average Loss: 0.4833
Epoch [4/10], Batch [0/47], Loss: 0.4526
Epoch [4/10], Batch [10/47], Loss: 0.4989
Epoch [4/10], Batch [20/47], Loss: 0.5210
Epoch [4/10], Batch [30/47], Loss: 0.4426
Epoch [4/10], Batch [40/47], Loss: 0.4765
Epoch [4/10] Finished - Avera

3. Regularisation to encourage more meaningful embeddings/representations:

In [132]:
l2_lambda = 1e-5  # Adjust this to control embedding regularization strength
embedding_loss = l2_lambda * model.embedding.weight.norm(2)  # L2 Regularization

# Compute the main loss
loss = criterion(outputs.view(-1, len(dataset.vocab)), targets.view(-1)) + embedding_loss

4. Generate sequences:

In [133]:
import torch.nn.functional as F
import torch
import random

import torch

def generate_sequence(model, dataset, start_token='<SOS>', max_length=7, temperature=1.0):
    """
    Generates a valid perovskite solar cell stack sequence while ensuring layer constraints.
    """
    model.eval()
    
    # Start with <SOS> token
    generated_sequence = []
    
    # Convert words to indices
    input_seq = torch.tensor([dataset.vocab['<SOS>']], dtype=torch.long).unsqueeze(0)  # Shape: (1, 1)
    
    for position in range(1, max_length):  # Generate each layer position
        allowed_tokens = dataset.allowed_per_position[position]  # Get valid choices for this layer
        allowed_indices = [dataset.vocab[token] for token in allowed_tokens]  # Convert to vocab indices
        
        # Get model predictions
        with torch.no_grad():
            output, _ = model(input_seq)  # Unpack logits and hidden state
            logits = output[:, -1, :]  # Get logits for the last generated token

        logits = output[:, -1, :]  # Get logits for the last generated token

        # Apply temperature scaling
        logits = logits / temperature

        # Convert logits to probabilities
        probs = F.softmax(logits, dim=-1).squeeze()

        # # Debugging print statements
        # print(f"Logits: {logits}")
        # print(f"Probabilities: {probs}")

        # Check for NaN or negative values
        if torch.isnan(probs).any() or torch.isinf(probs).any():
            raise ValueError("Found NaN or Inf in probability tensor!")
        if (probs < 0).any():
            raise ValueError("Found negative probabilities!")

        # Sample next token
        next_idx = torch.multinomial(probs, num_samples=1).item()
        next_token = dataset.idx_to_stack[next_idx]

        generated_sequence.append(next_token)
        
        if next_token == '<EOS>':
            break  # Stop generation when <EOS> is reached
        
        # Append new token to input sequence
        input_seq = torch.cat([input_seq, torch.tensor([[next_idx]], dtype=torch.long)], dim=1)

    return ','.join(generated_sequence)

# 🔹 Generate sequences at different temperatures
temperatures = [0.5, 1.0, 1.5]

print("\nGenerated Sequences at Different Temperatures:")
for temp in temperatures:
    print(f"\nTemperature {temp}:")
    for i in range(3):  # Generate 3 sequences per temperature
        print(f"Sample {i+1}: {generate_sequence(model, dataset, start_token='<SOS>', max_length=7, temperature=temp)}")


Generated Sequences at Different Temperatures:

Temperature 0.5:
Sample 1: <SOS>,SLG|FTO, TiO2-c|TiO2-mp, MAPbI3, Spiro-MeOTAD, Ag
Sample 2: <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, Ag
Sample 3: <SOS>,SLG|FTO, TiO2-c|TiO2-mp, MAPbI3, Spiro-MeOTAD, Au

Temperature 1.0:
Sample 1: <SOS>,SLG|FTO, TiO2-c|TiO2-mp, FA0.97MA0.03PbBr0.09I2.91, Spiro-MeOTAD, Au
Sample 2: <SOS>,SLG|FTO, TiO2-c|TiO2-mp, CsPbBrI2, Spiro-MeOTAD, Au
Sample 3: <SOS>,SLG|FTO, TiO2-c|TiO2-mp, Cs0.05FA0.79MA0.16PbBr0.51I2.51, Spiro-MeOTAD, Au

Temperature 1.5:
Sample 1: <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, CsPbI3
Sample 2: <SOS>,SLG|FTO, TiO2-c, FA0.85MA0.15PbBr0.45I2.55, Spiro-MeOTAD, Ag
Sample 3: <SOS>,SLG|FTO, TiO2-c|TiO2-mp, FAPbI3, Spiro-MeOTAD, Au


### Regression models to predict PCE and stability of generated stacks

1. Load and create embeddings, then convert the dataset

In [135]:
import torch

# 📌 Get the embedding matrix from the trained LSTM model
embedding_layer = model.embedding  # Assuming your LSTM model has an embedding layer
embedding_matrix = embedding_layer.weight.detach().cpu().numpy()  # Convert to NumPy

import numpy as np

def stack_to_vector(stack, dataset, embedding_matrix, method="mean"):
    """
    Converts a cleaned cell stack sequence into a numerical vector.
    
    Args:
        stack (str): Stack sequence (comma-separated materials).
        dataset: Dataset object containing vocab mappings.
        embedding_matrix (numpy.array): Pretrained material embeddings.
        method (str): How to combine material embeddings. Options: "mean", "concat".
        
    Returns:
        numpy.array: Vector representation of the stack.
    """
    tokens = stack.split(',')
    token_indices = [dataset.vocab[token] for token in tokens if token in dataset.vocab]
    
    if not token_indices:  # In case no valid tokens are found
        return np.zeros(embedding_matrix.shape[1])  # Return a zero vector
    
    embeddings = embedding_matrix[token_indices]  # Fetch embeddings

    if method == "mean":
        return np.mean(embeddings, axis=0)  # Average pooling (good for variable-length stacks)
    elif method == "concat":
        # Ensure fixed length: Pad with zeros if needed
        max_layers = 5  # Adjust based on your max stack size
        padded_embeddings = np.zeros((max_layers, embedding_matrix.shape[1]))
        for i, emb in enumerate(embeddings[:max_layers]):
            padded_embeddings[i] = emb
        return padded_embeddings.flatten()  # Concatenate embeddings
    else:
        raise ValueError("Invalid method. Use 'mean' or 'concat'.")

# Convert entire dataset into vectors
X = np.array([stack_to_vector(stack, dataset, embedding_matrix) for stack in df['Cleaned cell stack']])


2. Train the models

In [157]:
y_pce = df['JV_default_PCE'].values
y_stability = df['Stability_PCE_end_of_experiment'].values

# Train XGBoost models or any other regression model
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split

X_train = X_train.astype(np.float32)
y_train_stability = y_train_stability.astype(np.float32)

X_train, X_test, y_train_pce, y_test_pce = train_test_split(X, y_pce, test_size=0.2, random_state=42)

pce_model = XGBRegressor()
pce_model.fit(X_train, y_train_pce)

stability_model = XGBRegressor()
stability_model.fit(X_train, y_train_stability)

# 🔹 Generate stacks using your trained model at temperature = 1.0
generated_stacks = [
    generate_sequence(model, dataset, start_token='<SOS>', max_length=7, temperature=1.0)
    for _ in range(10)  # Generate 10 stacks
]

# 🔹 Convert generated stacks to numerical feature vectors
generated_vectors = np.array([stack_to_vector(stack, dataset, embedding_matrix) for stack in generated_stacks])

# 🔹 Predict PCE and Stability
pred_pce = pce_model.predict(generated_vectors)
pred_stability = stability_model.predict(generated_vectors)

# 🔹 Print predictions
for stack, pce, stability in zip(generated_stacks, pred_pce, pred_stability):
    print(f"Stack: {stack} -> Predicted JV_default_PCE: {pce:.2f}, Stability_PCE_end_of_experiment: {stability:.2f} %")


Stack: <SOS>,SLG|ITO, TiO2-c, MAPbI3, Spiro-MeOTAD, Au -> Predicted JV_default_PCE: 16.43, Stability_PCE_end_of_experiment: 48.47 %
Stack: <SOS>,SLG|FTO, TiO2-c, CsPbBrI2, Spiro-MeOTAD, Ag -> Predicted JV_default_PCE: 12.22, Stability_PCE_end_of_experiment: 53.80 %
Stack: <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, Au -> Predicted JV_default_PCE: 16.41, Stability_PCE_end_of_experiment: 53.91 %
Stack: <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, Ag -> Predicted JV_default_PCE: 14.50, Stability_PCE_end_of_experiment: 53.26 %
Stack: <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, Au -> Predicted JV_default_PCE: 16.41, Stability_PCE_end_of_experiment: 53.91 %
Stack: <SOS>,SLG|FTO, TiO2-c, CsPbBrI2, Spiro-MeOTAD, Au -> Predicted JV_default_PCE: 14.24, Stability_PCE_end_of_experiment: 61.91 %
Stack: <SOS>,SLG|FTO, TiO2-c|TiO2-mp, MAPbI3, Spiro-MeOTAD, Au -> Predicted JV_default_PCE: 15.78, Stability_PCE_end_of_experiment: 59.07 %
Stack: <SOS>,SLG|FTO, TiO2-c|TiO2-mp, FA0.86MA0.15PbBr0.45I2.55,

3. Compare to true values of PCE and stability of given stacks

In [163]:
import pandas as pd
from sklearn.metrics import mean_absolute_error

# Append <EOS> to generated stacks
generated_stacks_with_eos = [stack + ",<EOS>" for stack in generated_stacks]

# Find true PCE and stability values
true_pce = []
true_stability = []

for stack in generated_stacks_with_eos:
    match = df[df['Cleaned cell stack'].str.lower().str.strip() == stack.lower().strip()]   # Find exact match in dataset
    
    if not match.empty:
        true_pce.append(match['JV_default_PCE'].values[0])
        true_stability.append(match['Stability_PCE_end_of_experiment'].values[0])
    else:
        true_pce.append(None)
        true_stability.append(None)

# Store in DataFrame for comparison
comparison_df = pd.DataFrame({
    "Generated Stack": generated_stacks,
    "Generated Stack with <EOS>": generated_stacks_with_eos,
    "Predicted PCE": pred_pce,
    "Predicted Stability": pred_stability,
    "True PCE": true_pce,
    "True Stability": true_stability
})

print(comparison_df)

# 🔹 Compute Mean Absolute Error (MAE) for valid entries
valid_pce = comparison_df.dropna(subset=['True PCE'])
valid_stability = comparison_df.dropna(subset=['True Stability'])

if not valid_pce.empty:
    pce_mae = mean_absolute_error(valid_pce['True PCE'], valid_pce['Predicted PCE'])
    print(f"Mean Absolute Error (PCE): {pce_mae:.2f} %")

if not valid_stability.empty:
    stability_mae = mean_absolute_error(valid_stability['True Stability'], valid_stability['Predicted Stability'])
    print(f"Mean Absolute Error (Stability): {stability_mae:.2f} %" )

                                     Generated Stack  \
0    <SOS>,SLG|ITO, TiO2-c, MAPbI3, Spiro-MeOTAD, Au   
1  <SOS>,SLG|FTO, TiO2-c, CsPbBrI2, Spiro-MeOTAD, Ag   
2    <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, Au   
3    <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, Ag   
4    <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, Au   
5  <SOS>,SLG|FTO, TiO2-c, CsPbBrI2, Spiro-MeOTAD, Au   
6  <SOS>,SLG|FTO, TiO2-c|TiO2-mp, MAPbI3, Spiro-M...   
7  <SOS>,SLG|FTO, TiO2-c|TiO2-mp, FA0.86MA0.15PbB...   
8  <SOS>,SLG|FTO, TiO2-c|TiO2-mp, MAPbI3, Spiro-M...   
9  <SOS>,SLG|FTO, TiO2-c|TiO2-mp, MAPbI3, Spiro-M...   

                          Generated Stack with <EOS>  Predicted PCE  \
0  <SOS>,SLG|ITO, TiO2-c, MAPbI3, Spiro-MeOTAD, A...      16.425735   
1  <SOS>,SLG|FTO, TiO2-c, CsPbBrI2, Spiro-MeOTAD,...      12.221713   
2  <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, A...      16.405151   
3  <SOS>,SLG|FTO, TiO2-c, MAPbI3, Spiro-MeOTAD, A...      14.498394   
4  <SOS>,SLG|FTO, TiO2-c, MA