# Replace entries below LOD - plasma samples eider 2021 CECs

Read in the Excel file, check how many samples are above LOD (limit of detection) per compound, and replace "lod" with 1/2LOD.

In [None]:
# Import required packages
import pandas as pd

## Read in files, set LOD percentage, LOD values

In [None]:
# Read in dataset
allCECs = pd.read_excel('Appropriate file path to Excel file with dataset')

# Read in the list of compounds
compoundList = pd.read_excel('Appropriate file path to Excel file with dataset', sheet_name = "compoundList")
compoundList = pd.unique(compoundList["compoundList"])

In [None]:
# Create new df with NaNs replaced with -1 (necessary to do as there are varying number of NaNs for each compound/group 
# and math.isnan doesn't accept strings as input):
allCECsNoNA = allCECs.fillna(-1)

In [None]:
# Set the percentage LOD cut-off:
percentLOD = 60

In [None]:
# LOD values:

lodTTR = 0.606
lod2OHBTH = 0.602
lod2SBTH = 0.288
lod2MeSBTH = 0.286
lod2SCNMeSBTH = 0.051

lodBzP1 = 0.661
lodBzP3 = 0.873

lodBPA = 6.21
lodBPS = 0.063
lodBPAF = 0.019

lodMeP = 0.210
lodEtP = 0.027
lodPrP = 0.010
lodBuP = 0.011
lodBezP = 0.056

lodmDP = 1.33
lodmIBP = 1.81
lodmBP = 3.61
lodmHP = 0.451
lodmEHP = 10.1
lodmMP = 1.794
lodmEP = 2.082

## Divide into comparison groups

In [None]:
# Set the dataframe to use
df = allCECsNoNA

# Early incubation, early & late breeding period
earlyInc = df[(df.Incubation == 'early') & (df.BreedingPeriod != 'mid')].reset_index(drop=True)

# Late incubation, early & late breeding period
lateInc = df[(df.Incubation == 'late') & (df.BreedingPeriod != 'mid')].reset_index(drop=True)

# Early incubation, early breeding period
earlyIncEarlyBP = df[(df.Incubation == 'early') & (df.BreedingPeriod == 'early')].reset_index(drop=True)

# Early incubation, late breeding period
earlyIncLateBP = df[(df.Incubation == 'early') & (df.BreedingPeriod == 'late')].reset_index(drop=True)

# Late incubation, early breeding period
lateIncEarlyBP = df[(df.Incubation == 'late') & (df.BreedingPeriod == 'early')].reset_index(drop=True)

# Late incubation, late breeding period
lateIncLateBP = df[(df.Incubation == 'late') & (df.BreedingPeriod == 'late')].reset_index(drop=True)

In [None]:
# List of groups:
groups = [earlyIncEarlyBP, earlyIncLateBP, lateIncEarlyBP, lateIncLateBP]

## Check percentage of samples above LOD

Use the function below to determine which compounds are tested for differences in concentration between early and late incubation and between early and late breeders. 

In [None]:
# Function calculating the percent of samples above LOD for a given data set:

def aboveLOD(compoundList, dataSet):
    overThreshold = []
    for compound in compoundList:
        count = 0
        countNA = 0
        for sample in dataSet[compound]:
            if (sample == -1):
                countNA += 1
        validLength = len(dataSet) - countNA
        threshold = (validLength/100)*percentLOD 
        for sample in range(len(dataSet)):
            if (type(dataSet.at[sample, compound]) == float):
                count += 1
        if (count >= threshold):
            overThreshold.append(compound)
    print(overThreshold)

In [None]:
for group in groups:
    name =[x for x in globals() if globals()[x] is group][0]
    print("For dataset", name, "the following compounds are equal to or above", percentLOD, "%:")
    aboveLOD(compoundList, group)

## Replace LOD with 1/2 LOD

In [None]:
# Replace observations below LOD with 1/2 LOD

# BTRs and BTHs
allCECs.loc[allCECs["TTR"] == "lod", "TTR"] = (lodTTR/2)
allCECs.loc[allCECs["2OHBTH"] == "lod", "2OHBTH"] = (lod2OHBTH/2)
allCECs.loc[allCECs["2SBTH"] == "lod", "2SBTH"] = (lod2SBTH/2)
allCECs.loc[allCECs["2MeSBTH"] == "lod", "2MeSBTH"] = (lod2MeSBTH/2)
allCECs.loc[allCECs["2SCNMeSBTH"] == "lod", "2SCNMeSBTH"] = (lod2SCNMeSBTH/2)

# BzPs and BPAs
allCECs.loc[allCECs["BzP1"] == "lod", "BzP1"] = (lodBzP1/2)
allCECs.loc[allCECs["BzP3"] == "lod", "BzP3"] = (lodBzP3/2)
allCECs.loc[allCECs["BPA"] == "lod", "BPA"] = (lodBPA/2)
allCECs.loc[allCECs["BPS"] == "lod", "BPS"] = (lodBPS/2)
allCECs.loc[allCECs["BPAF"] == "lod", "BPAF"] = (lodBPAF/2)

# Parabens
allCECs.loc[allCECs["MeP"] == "lod", "MeP"] = (lodMeP/2)
allCECs.loc[allCECs["EtP"] == "lod", "EtP"] = (lodEtP/2)
allCECs.loc[allCECs["PrP"] == "lod", "PrP"] = (lodPrP/2)
allCECs.loc[allCECs["BuP"] == "lod", "BuP"] = (lodBuP/2)
allCECs.loc[allCECs["BezP"] == "lod", "BezP"] = (lodBezP/2)

# Phthalate metabolites
allCECs.loc[allCECs["mDP"] == "lod", "mDP"] = (lodmDP/2)
allCECs.loc[allCECs["mIBP"] == "lod", "mIBP"] = (lodmIBP/2)
allCECs.loc[allCECs["mBP"] == "lod", "mBP"] = (lodmBP/2)
allCECs.loc[allCECs["mHP"] == "lod", "mHP"] = (lodmHP/2)
allCECs.loc[allCECs["mEHP"] == "lod", "mEHP"] = (lodmEHP/2)
allCECs.loc[allCECs["mMP"] == "lod", "mMP"] = (lodmMP/2)
allCECs.loc[allCECs["mEP"] == "lod", "mEP"] = (lodmEP/2)

## Write dataframe with LODs substituted to CSV file

In [None]:
# Write dataframe to csv file (named plasmaEiderNTNU2021WithLODs.csv)

allCECs.to_csv('Appropriate file path/plasmaEiderNTNU2021WithLODs.csv')