In [294]:
import pandas as pd 
import numpy as np
import cirpy

In [295]:
#Data Cleaning and Reading 
PL = pd.read_csv('Product_List.csv')
PL.columns = PL.columns.str.replace(' ', '')
PL = PL.rename(columns= {"min(%)":"minp", "max(%)":"maxp"})
PL.head()

Unnamed: 0,ProductID,CAS,ChemicalName,minp,maxp
0,Product 1,94-36-0,Benzoyl peroxide,5.0,10.0
1,Product 1,7732-18-5,Water,55.0,65.0
2,Product 1,56-81-5,Glycerin,8.0,10.0
3,Product 1,57-10-3,Palmitic acid,3.0,5.0
4,Product 1,9007-20-9,Carbomer,3.0,5.0


In [296]:
#pattern mathcing for peroxides
def number_peroxides (smile):
    pattern = 'OO'
    count_peroxides = smile.count(pattern)
    return count_peroxides

#Calculates the chemcial oxygen
def Chemical_Oxygen(n,c,m):
    #n is the number of -O-O- groups per molecule in the species
    #c is the concentration (mass percent) of the species
    #m is the molecular mass of the  species
    #USING THE max available concentration in the list of ingredients as instructed
    Oa_f = ((n*c)/m)
    return float(Oa_f)
    

In [297]:
#look up properties
PL['SMILES'] = PL.apply(lambda row : cirpy.resolve(row['CAS'],'smiles'), axis = 1)
PL['Molecular_Weight'] = PL.apply(lambda row : cirpy.resolve(row['CAS'],'mw'), axis = 1)
#match for peroxides
PL['peroxides_count'] = PL.apply(lambda row : number_peroxides(row['SMILES']) if row['SMILES'] != None else np.nan, axis = 1)
#Calculate the chemical oxygen using the max available concentration in the list of ingredients as instructed
PL['Chemical_Oxygen'] = PL.apply(lambda row : Chemical_Oxygen(float(row['peroxides_count']),float(row['maxp']),float(row['Molecular_Weight'])) if row['Molecular_Weight'] != None else np.nan, axis = 1)

In [298]:
#Make new data frame to presever one with all data 
df = PL

In [299]:
#Formula in CFR 173.128(a)(4) only referes to peroxides so we can drop all non peroxide chemcials
df.drop(df[df['Chemical_Oxygen'] == 0].index, inplace = True)
df.tail()

Unnamed: 0,ProductID,CAS,ChemicalName,minp,maxp,SMILES,Molecular_Weight,peroxides_count,Chemical_Oxygen
6,Product 2,7722-84-1,Hydrogen peroxide,1.5,1.8,OO,34.0146,1.0,0.052918
8,Product 2,80-43-3,Dicumyl peroxide,7.0,9.0,CC(C)(OOC(C)(C)c1ccccc1)c2ccccc2,270.3706,1.0,0.033288
9,Product 3,15630-89-4,sodium percarbonate,25.0,35.0,[Na+].[Na+].[Na+].[Na+].OO.OO.OO.[O-]C([O-])=O...,314.0213,3.0,0.334372
10,Product 3,68131-39-5,"Alcohols, C12-15, ethoxylated",3.0,4.0,,,,
13,Product 4,7722-84-1,hydrogen peroxide,0.0,1.0,OO,34.0146,1.0,0.029399


In [300]:
Organic_Peroxides=[]
Organic_Peroxides_possible_exempt_r1 = []
Organic_Peroxides_possible_exempt_r2 = []
Organic_Peroxides_exempt  = []


for index,row in df.iterrows(): 
    if row.maxp > 7.0 : 
        print (row.ProductID,':', row.ChemicalName,'can be above 7%  and as such is not eligible for exemption') 
        Organic_Peroxides.append(row.ProductID)
    
    elif (row.minp > 1) & (row.maxp < 7.0 ):
        print(row.ProductID,':', row.ChemicalName, 'range is 1-7%')
        Organic_Peroxides_possible_exempt_r1.append(row.ProductID)
        #limit = 0.5
    else: 
        print(row.ProductID,':', row.ChemicalName, 'is less than 1%')
        #limit  = 1.0 
        Organic_Peroxides_possible_exempt_r2.append(row.ProductID)
        
Organic_Peroxides_possible_exempt_r1 = [x for x in Organic_Peroxides_possible_exempt_r1 if x not in Organic_Peroxides ]
Organic_Peroxides_possible_exempt_r2 = [x for x in Organic_Peroxides_possible_exempt_r2 if x not in Organic_Peroxides ]

# print (Organic_Peroxides_possible_exempt_r1)
# print (Organic_Peroxides_possible_exempt_r2)

#now use formula and list over the possible exemptions 
for ogp in Organic_Peroxides_possible_exempt_r1:
    df = df[df['ProductID'].str.contains(ogp)]
    Total = 16 * (df['Chemical_Oxygen'].sum())
    if Total < 0.5:
        Organic_Peroxides_exempt.append(ogp)
    else: 
        Organic_Peroxides.append(ogp)
    #print(Total)
for ogp in Organic_Peroxides_possible_exempt_r2:
    df = df[df['ProductID'].str.contains(ogp)]
    Total = 16 * (df['Chemical_Oxygen'].sum())
    if Total < 1.0:
        Organic_Peroxides_exempt.append(ogp)
    else: 
        Organic_Peroxides.append(ogp)
    #print(Total)


print("The Organic Peroxides are:", Organic_Peroxides)
print("The Products Exempt are:", Organic_Peroxides_exempt)

Product 1 : Benzoyl peroxide can be above 7%  and as such is not eligible for exemption
Product 2 : Hydrogen peroxide range is 1-7%
Product 2 : Dicumyl peroxide can be above 7%  and as such is not eligible for exemption
Product 3 : sodium percarbonate can be above 7%  and as such is not eligible for exemption
Product 3 : Alcohols, C12-15, ethoxylated range is 1-7%
Product 4 : hydrogen peroxide is less than 1%
The Organic Peroxides are: ['Product 1', 'Product 2', 'Product 3']
The Products Exempt are: ['Product 4']
