In [1]:
import random
import itertools
import numpy as np
import pandas as pd

In [2]:
## Enter in file path for smiles list. smiles list must include columns SMILES and category. Category should be either Salt or Solvent.
fn = r'SMILES_list.xlsx'
df = pd.read_excel(fn,header=0)
df.columns = [ent.strip() for ent in df.columns]
df = df.apply(lambda x: x.str.strip() if x.dtype == "object" else x)

In [3]:
# Extract all salts and other components
salts = df[df['Category'] == 'Salt']['SMILES'].tolist()
solvents = df[df['Category'] == 'Solvent']['SMILES'].tolist()


# Define constraints (a dictionary where keys are component indices, values are percentage constraints)
# Example: salts must be between 5% and 15%
constraints = {salt: (5, 15) for salt in salts}
constraints.update({solvent: (10, None) for solvent in solvents})  # Solvents: >10%


# Number of components per recipe range (from 3 to 6)
min_components = 3
max_components = 6


# Total percentage sum for the recipe
total_percentage = 100


In [4]:
# Function to check if the percentages meet constraints
def valid_percentages(selected_components, percentages):
    for i, component in enumerate(selected_components):
        if component in constraints:
            lower, upper = constraints[component]
            if lower is not None and percentages[i] < lower:
                return False
            if upper is not None and percentages[i] > upper:
                return False
    return True

# Function to generate random integer percentages that sum to 100, subject to constraints
def generate_percentages(selected_components):
    num_components = len(selected_components)
    
    # Start with random integers that sum to 100
    percentages = np.random.randint(1, 100, size=num_components)
    
    # Normalize to ensure sum is 100
    percentages = (percentages / percentages.sum()) * total_percentage
    percentages = np.floor(percentages).astype(int)
    
    # Handle rounding error: adjust to ensure the sum is exactly 100
    diff = total_percentage - percentages.sum()
    while diff != 0:
        idx = random.randint(0, num_components - 1)
        if diff > 0:
            percentages[idx] += 1
        else:
            if percentages[idx] > 0:
                percentages[idx] -= 1
        diff = total_percentage - percentages.sum()

    # Apply constraints and adjust as needed
    for i, component in enumerate(selected_components):
        if component in constraints:
            lower, upper = constraints[component]
            if lower is not None and percentages[i] < lower:
                percentages[i] = lower
            if upper is not None and percentages[i] > upper:
                percentages[i] = upper
    
    # Normalize again if needed, adjusting for constraint changes
    if percentages.sum() != total_percentage:
        percentages = (percentages / percentages.sum()) * total_percentage
        percentages = np.floor(percentages).astype(int)
        
        # Handle rounding error again after constraint adjustments
        diff = total_percentage - percentages.sum()
        while diff != 0:
            idx = random.randint(0, num_components - 1)
            if diff > 0:
                percentages[idx] += 1
            else:
                if percentages[idx] > 0:
                    percentages[idx] -= 1
            diff = total_percentage - percentages.sum()

    return percentages



# Function to randomly sample recipes and return them as a DataFrame
def generate_recipes(num_samples):
    recipe_rows = []
    
    # Continue generating recipes until we have the desired number of samples
    while len(recipe_rows) < num_samples:
        # Randomly select a salt as the first component
        salt_component = random.choice(salts)
        # Randomly select the number of components (between 3 and 6)
        num_non_salt_components = random.randint(min_components - 1, max_components - 1)
        
        # Randomly select non-salt components
        non_salt_combination = random.sample(solvents, num_non_salt_components)
        
        selected_components = [salt_component] + non_salt_combination
        
        # Generate random percentages
        percentages = generate_percentages(selected_components)
        
        # Validate the percentages
        if valid_percentages(selected_components, percentages):
            # If recipe has less than 6 components, pad with 'O' and 0.0
            padded_components = selected_components + ['O'] * (6 - len(selected_components))
            padded_percentages = list(percentages) + [0.0] * (6 - len(percentages))
            
            # Create a row for the DataFrame
            recipe_row = padded_components + padded_percentages
            recipe_rows.append(recipe_row)
    
    # Create a DataFrame from the recipes
    column_names = [f'smiles{i+1}' for i in range(6)] + [f'conc{i+1}' for i in range(6)]
    df_recipes = pd.DataFrame(recipe_rows, columns=column_names)
    
    return df_recipes

In [5]:

# Generate recipes and store in a DataFrame
num_samples = int(10) # number of electrolytes to generate
df_recipes = generate_recipes(num_samples=num_samples)

# Print the DataFrame
df_recipes

Unnamed: 0,smiles1,smiles2,smiles3,smiles4,smiles5,smiles6,conc1,conc2,conc3,conc4,conc5,conc6
0,[Li+].[B-]1(OC(=O)C(=O)O1)(F)F,COCCOC,CCOC=O,CS(=O)C,O,O,14,25,31,30.0,0.0,0.0
1,[Li+].C(F)(F)(F)S(=O)(=O)[O-],C1=CC=CC=C1,CCOC=O,C1CCS(=O)(=O)C1,CCCCOC(=O)C,COC(=O)OC,15,20,10,10.0,18.0,27.0
2,[Li+].F[As-](F)(F)(F)(F)F,C(C(C(F)F)(F)F)OC(C(F)F)(F)F,C1CC(=O)OC1,CCOC(=O)OC,C1CCOC1,C1COCO1,14,24,11,11.0,29.0,11.0
3,[Li+].[B-]12(OC(=O)C(=O)O1)OC(=O)C(=O)O2,CCOC(=O)OCC,CCOC(=O)OC,C1CCOC1,COCCOCCOCCOCCOC,O,12,11,26,27.0,24.0,0.0
4,[Li+].F[P-](F)(F)(F)(F)F,CCC#N,C1=CC=CC=C1,COC=O,COCCOCCOC,O,15,28,13,34.0,10.0,0.0
5,[Li+].C(#N)C1=C(N=C([N-]1)C(F)(F)F)C#N,CC1COC(=O)O1,C1COCO1,COC(=O)OC,C1CCOC1,C1=CC=CC=C1,15,10,17,29.0,10.0,19.0
6,[Li+].C(#N)C1=C(N=C([N-]1)C(F)(F)F)C#N,CCOC(=O)OC,C1CC(=O)OC1,CCC#N,COC=O,O,15,10,30,18.0,27.0,0.0
7,[Li+].F[P-](F)(F)(F)(F)F,CC1COC(=O)O1,C1COCO1,C1CCOC1,O,O,5,47,22,26.0,0.0,0.0
8,[Li+].[B-]1(OC(=O)C(=O)O1)(F)F,C1COCO1,COCCOCCOCCOCCOC,COC=O,CC1COC(=O)O1,CCC#N,5,14,10,21.0,23.0,27.0
9,[Li+].[B-]12(OC(=O)C(=O)O1)OC(=O)C(=O)O2,C1CC(=O)OC1,CCC(=O)C,O,O,O,12,45,43,0.0,0.0,0.0


In [6]:
# Save generated electrolytes as a csv file
fn = 'Dummy Electrolytes/test.csv'
df_recipes.to_csv(fn,index=False)