<a href="https://colab.research.google.com/github/bribibel/labcalculations/blob/main/calculate_retention_index.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

This program (made with lots of prodding of ChatGTP for full disclosure), calculates the retention index (RI) for a set of test compounds based on their retention times and those of a set of alkane standards.
It uses a logarithmic interpolation between the two nearest alkanes to determine the RI for each compound. The results are displayed in a table and exported to a CSV file.

Please ensure your input CSV files are formatted as follows:
- Alkane data: two columns with or without headers.
  * Column 1: Number of Carbon Atoms
  * Column 2: Retention Time

- Compound data: two columns with or without headers.
  * Column 1: Compound Name
  * Column 2: Retention Time

The script will auto-detect and skip header rows if they do not contain numeric values.

In [None]:
import math
import pandas as pd
from google.colab import files
import io
from datetime import datetime

# Function to calculate retention index
def calculate_retention_index(rt, smaller_alkane, larger_alkane, alkane_rts):
    n = smaller_alkane
    log_rt = math.log10(rt)
    log_rt_small = math.log10(alkane_rts[smaller_alkane])
    log_rt_large = math.log10(alkane_rts[larger_alkane])
    ri = 100 * (n + (log_rt - log_rt_small) / (log_rt_large - log_rt_small))
    return round(ri)

# Function to load CSV and skip header if necessary
def load_csv_with_optional_skip(uploaded_file):
    content = uploaded_file[next(iter(uploaded_file))]
    for skip in [0, 1]:
        df = pd.read_csv(io.BytesIO(content), skiprows=skip)
        try:
            pd.to_numeric(df.iloc[0, 1])
            return df
        except ValueError:
            continue
    raise ValueError("Failed to parse a valid table with numeric data.")

# Prompt user to upload files
print("Please upload the alkane CSV file:")
uploaded_alkane = files.upload()
df_alkanes = load_csv_with_optional_skip(uploaded_alkane)
df_alkanes.columns = [col.strip() for col in df_alkanes.columns]
alkane_rts = dict(zip(df_alkanes.iloc[:, 0], df_alkanes.iloc[:, 1]))

print("Please upload the compound CSV file:")
uploaded_compound = files.upload()
df_compounds = load_csv_with_optional_skip(uploaded_compound)
df_compounds.columns = [col.strip() for col in df_compounds.columns]

# Calculate retention index for each compound
results = []
sorted_alkanes = sorted(alkane_rts.items())
for _, row in df_compounds.iterrows():
    name, rt = row.iloc[0], row.iloc[1]
    for i in range(len(sorted_alkanes) - 1):
        n1, rt1 = sorted_alkanes[i]
        n2, rt2 = sorted_alkanes[i + 1]
        if rt1 <= rt <= rt2:
            ri = calculate_retention_index(rt, n1, n2, alkane_rts)
            results.append((name, rt, ri))
            break

# Output result as DataFrame
df_result = pd.DataFrame(results, columns=["Compound", "Retention Time", "Retention Index"])
print("\nRetention Index Table:")
display(df_result)

# Export to CSV with date
date_str = datetime.now().strftime("_%m%d%y")
output_filename = f"retention_index_results{date_str}.csv"
df_result.to_csv(output_filename, index=False)
files.download(output_filename)

# df_result is available for further use
