In [3]:
import pandas as pd
import re
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.lines import Line2D
from itertools import cycle
from IPython.display import HTML
import os


In [4]:
# Define the path to your Excel file
excel_file = 'AirStability1.xlsx'  # Replace with your actual file path

# Check if the file exists
if not os.path.exists(excel_file):
    raise FileNotFoundError(f"The file {excel_file} does not exist.")


In [5]:
# Load all sheet names
xls = pd.ExcelFile(excel_file)
sheet_names = xls.sheet_names  # List of sheet names representing different organometallics

# Display sheet names
print("Sheet Names (Organometallics):")
print(sheet_names)


Sheet Names (Organometallics):
['DEZ', 'TMA', 'TDMASn']


In [6]:

# Dictionary to hold data for inspection and extraction of organic/UV information
data_dict = {}
organics_uv = set()  # Set to collect unique organic and UV information

# Process each sheet
for sheet in sheet_names:
    df = pd.read_excel(xls, sheet_name=sheet, header=0)  # Read sheet with headers on the first row
    data_dict[sheet] = df
    
    # Extract organics and UV status from y columns (assumed to be in odd indices after first column)
    for i in range(1, len(df.columns), 2):  # Start from the second column (index 1), assuming x1, y1, x2, y2, ...
        y_header = df.columns[i].strip() if isinstance(df.columns[i], str) else ''
        
        # Identify organic and UV status based on "UV" in header
        if 'UV' in y_header:
            organic_name = y_header.replace('UV', '').strip()
            uv_status = 'UV'
        else:
            organic_name = y_header
            uv_status = 'No UV'
        
        # Add to the organics and UV set
        organics_uv.add((organic_name, uv_status))

# Convert the set to a sorted list for clearer presentation
organics_uv = sorted(organics_uv)
print("\nUnique Organics and UV Status:")
for organic, uv in organics_uv:
    print(f"{organic}: {uv}")



Unique Organics and UV Status:
BTY: No UV
BTY: UV
CB: No UV
CB: UV
DHB: No UV
DHB: UV
EG: No UV
EG: UV
MPD: No UV
MPD: UV
THB: No UV
THB: UV


In [7]:
# Separate lists for organic names and UV statuses
organics = [item[0] for item in organics_uv]
uv_statuses = [item[1] for item in organics_uv]

print("Organics:")
print(organics)

print("\nUV Statuses:")
print(uv_statuses)


Organics:
['BTY', 'BTY', 'CB', 'CB', 'DHB', 'DHB', 'EG', 'EG', 'MPD', 'MPD', 'THB', 'THB']

UV Statuses:
['No UV', 'UV', 'No UV', 'UV', 'No UV', 'UV', 'No UV', 'UV', 'No UV', 'UV', 'No UV', 'UV']


In [8]:
# Assign a unique color to each organic
color_cycle = cycle(plt.cm.tab10.colors)  # Cycle through the 'tab10' colormap
organic_colors = {organic: color for organic, color in zip(organics, color_cycle)}

print("Assigned Colors to Organics:")
for organic, color in organic_colors.items():
    print(f"{organic}: {color}")


Assigned Colors to Organics:
BTY: (1.0, 0.4980392156862745, 0.054901960784313725)
CB: (0.8392156862745098, 0.15294117647058825, 0.1568627450980392)
DHB: (0.5490196078431373, 0.33725490196078434, 0.29411764705882354)
EG: (0.4980392156862745, 0.4980392156862745, 0.4980392156862745)
MPD: (0.09019607843137255, 0.7450980392156863, 0.8117647058823529)
THB: (1.0, 0.4980392156862745, 0.054901960784313725)


In [22]:
def normalize_y_data(data_dict):
    """
    Normalize each y column in each sheet to make the first data point equal to 1.
    
    Parameters:
        data_dict (dict): Dictionary with sheet dataframes.
        
    Returns:
        dict: Dictionary with normalized sheet dataframes.
    """
    normalized_data_dict = {}
    
    for sheet, df in data_dict.items():
        df_copy = df.copy()
        
        # Normalize each y column
        for i in range(1, len(df_copy.columns), 2):
            y_column = df_copy.columns[i]
            if df_copy[y_column].iloc[0] != 0:
                df_copy[y_column] /= df_copy[y_column].iloc[0]
        
        normalized_data_dict[sheet] = df_copy
    
    return normalized_data_dict


In [23]:
def setup_plot(organics_uv, sheet_names):
    """
    Set up the plot for the animation with lines for each organic and UV status.
    
    Parameters:
        organics_uv (list): List of unique (organic name, UV status) pairs.
        sheet_names (list): List of sheet names.
        
    Returns:
        matplotlib.figure.Figure, matplotlib.axes._axes.Axes, dict: Figure, axis, and lines dictionary.
    """
    fig, ax = plt.subplots(figsize=(12, 8))
    
    # Set up colors and line styles
    color_cycle = cycle(plt.cm.tab10.colors)
    organic_colors = {organic: next(color_cycle) for organic, _ in organics_uv}
    line_styles = ['solid', 'dashed', 'dashdot', 'dotted']
    line_styles_cycle = cycle(line_styles)
    organometallic_styles = {sheet: next(line_styles_cycle) for sheet in sheet_names}
    
    # Initialize Line2D objects for each combination
    lines = {}
    for sheet in sheet_names:
        for organic, uv in organics_uv:
            key = f"{sheet}_{organic}_{uv}"
            alpha = 1.0 if uv == 'UV' else 0.6
            line = Line2D([], [], label=f"{organic} - {sheet} - {uv}",
                          color=organic_colors[organic],
                          linestyle=organometallic_styles[sheet],
                          alpha=alpha)
            lines[key] = line
            ax.add_line(line)
    
    # Set axis labels and title
    ax.set_xlabel('Time (min)')
    ax.set_ylabel('Normalized Thickness')
    ax.set_title('Animated Time Series by Organic and Organometallic Used')
    
    return fig, ax, lines


In [24]:
def animate_plot(frames, data_dict, organics_uv, lines, fig, ax):
    """
    Animate the plot, adding lines frame by frame with fading effect for previous frames.
    
    Parameters:
        frames (list): List of frame names (sheet names) in order.
        data_dict (dict): Dictionary with normalized data for each sheet.
        organics_uv (list): List of unique (organic name, UV status) pairs.
        lines (dict): Dictionary of Line2D objects for each organic-UV combination.
        
    Returns:
        matplotlib.animation.FuncAnimation: Animation object.
    """
    def init():
        for line in lines.values():
            line.set_data([], [])
            line.set_alpha(0.0)  # Start with all lines invisible
        return list(lines.values())
    
    def animate(frame_idx):
        sheet = frames[frame_idx]
        df = data_dict[sheet]
        
        for organic, uv in organics_uv:
            y_column = None
            for i in range(1, len(df.columns), 2):
                y_header = re.sub(r'\.\d+$', '', df.columns[i])
                expected_header = f"{organic} UV" if uv == 'UV' else organic
                if y_header == expected_header:
                    y_column = df.columns[i]
                    x_column = df.columns[i - 1]
                    break
            
            if not y_column:
                continue
            
            key = f"{sheet}_{organic}_{uv}"
            x = df[x_column]
            y = df[y_column]
            lines[key].set_data(x, y)
            lines[key].set_alpha(1.0)
            
            for prev_idx in range(frame_idx):
                prev_sheet = frames[prev_idx]
                prev_key = f"{prev_sheet}_{organic}_{uv}"
                if prev_key in lines:
                    new_alpha = max(0.0, lines[prev_key].get_alpha() - 0.3)
                    lines[prev_key].set_alpha(new_alpha)
        
        return list(lines.values())
    
    ani = animation.FuncAnimation(fig, animate, frames=len(frames),
                                  init_func=init, blit=True, repeat=False, interval=1000)
    return ani


In [25]:
# Set up the plot
fig, ax = plt.subplots(figsize=(12, 8))

# Initialize a dictionary to hold Line2D objects for each organic, UV status, and organometallic
lines = {}

for sheet in sheet_names:
    for (organic, uv) in organics_uv:
        key = f"{sheet}_{organic}_{uv}"
        # Assign alpha based on UV status: 1.0 for UV, 0.6 for No UV
        initial_alpha = 1.0 if uv == 'UV' else 0.6
        line = Line2D([], [], label=f"{organic} - {sheet} - {uv}",
                      color=organic_colors[organic],
                      linestyle=organometallic_styles[sheet],
                      alpha=initial_alpha)
        lines[key] = line
        ax.add_line(line)


In [26]:
# Normalize each y column to make the first data point equal to 1
normalized_data_dict = normalize_y_data(data_dict)
print("Data normalization complete.")


Data normalization complete.


In [27]:
# Set up the plot with lines for each organic and UV status
fig, ax, lines = setup_plot(organics_uv, sheet_names)


In [28]:
# Create the animation
ani = animate_plot(sheet_names, normalized_data_dict, organics_uv, lines, fig, ax)
HTML(ani.to_jshtml())  # Display the animation inline in the notebook


In [None]:
# Step 1: Install Required Libraries
# Uncomment and run the following line if you haven't installed the libraries yet.
# !pip install pandas matplotlib openpyxl pillow

# Step 2: Import Libraries
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.ticker import MultipleLocator, AutoMinorLocator
from matplotlib.animation import FuncAnimation, PillowWriter
import numpy as np
import os

# Optional: Apply font customization globally
plt.rcParams['font.family'] = 'Arial'
plt.rcParams['font.size'] = 10

# Step 3: Define Organics and Read the Excel Data
# Replace 'data.xlsx' with your actual Excel file path
excel_file = 'data.xlsx'

# Define the list of organics
organics = ['MPD', 'EG', 'THB', 'BTY', 'DHB', 'CB']

# Generate the corresponding UV labels
organics_uv = [f"{organic} UV" for organic in organics]

# Combine into a single list for easier processing
all_series_labels = organics + organics_uv

# Read the entire Excel file
try:
    df = pd.read_excel(excel_file)
except FileNotFoundError:
    print(f"Error: The file '{excel_file}' was not found.")
    # Optionally, exit the script or handle the error as needed
    exit()

# Identify column pairs (Assuming columns are in pairs: X1, Y1, X2, Y2, ...)
columns = df.columns.tolist()
column_pairs = []

for i in range(0, len(columns), 2):
    if i + 1 < len(columns):
        column_pairs.append((columns[i], columns[i + 1]))

# Initialize lists to hold organic and organicUV series
organic_series = []
organicUV_series = []

# Populate the lists based on Y column headers
for x_col, y_col in column_pairs:
    if y_col in organics:
        organic_series.append({'x': df[x_col], 'y': df[y_col], 'label': y_col})
    elif y_col in organics_uv:
        organicUV_series.append({'x': df[x_col], 'y': df[y_col], 'label': y_col})

# Verify the identified series
print("Organic Series:")
for series in organic_series:
    print(f" - {series['label']} with {len(series['x'])} data points")

print("\nOrganicUV Series:")
for series in organicUV_series:
    print(f" - {series['label']} with {len(series['x'])} data points")

# Step 4: Normalize and Downsample the Data
# Define the time limit and sampling rate
time_limit = 60  # minutes
sampling_rate = 5  # Take every 5th data point

# Function to normalize and filter series
def preprocess_series(series, sampling_rate, time_limit):
    # Normalize Y so that the first value is 1
    first_value = series['y'].iloc[0]
    if first_value != 0:
        series['y'] = series['y'] / first_value
    else:
        # Handle zero initial value by leaving the series as is or implementing another strategy
        series['y'] = series['y']  # Modify as needed
    
    # Filter data up to the time limit
    mask = series['x'] <= time_limit
    series['x'] = series['x'][mask]
    series['y'] = series['y'][mask]
    
    # Downsample the data
    series['x'] = series['x'].iloc[::sampling_rate].reset_index(drop=True)
    series['y'] = series['y'].iloc[::sampling_rate].reset_index(drop=True)
    
    return series

# Apply preprocessing to all series
organic_series = [preprocess_series(s, sampling_rate, time_limit) for s in organic_series]
organicUV_series = [preprocess_series(s, sampling_rate, time_limit) for s in organicUV_series]

# Determine the overall X range
all_x = pd.concat([s['x'] for s in organic_series + organicUV_series])
x_min = all_x.min()
x_max = all_x.max()

# Set Y-axis minimum to 0 and determine Y range
all_y = pd.concat([s['y'] for s in organic_series + organicUV_series])
y_min = 0
y_max = all_y.max()

print(f"\nAfter preprocessing:")
print(f"Time range: {x_min} to {x_max} minutes")
print(f"Measurement range: {y_min} to {y_max}")

# Step 5: Assign Two Consistent Colors Using Viridis Colormap
# Get viridis colormap
cmap = plt.get_cmap('viridis')

# Assign two consistent colors from viridis
color_as_deposited = cmap(0.3)  # Adjust the value (0 to 1) for desired shade
color_uv_treated = cmap(0.7)     # Adjust the value (0 to 1) for desired shade

print(f"Color As Deposited: {mpl.colors.to_hex(color_as_deposited)}")
print(f"Color UV Treated: {mpl.colors.to_hex(color_uv_treated)}")

# Step 6: Create the Matplotlib Static Figures and Animated GIFs
# Define figure size in centimeters
fig_width_cm = 9
fig_height_cm = 14

# Convert centimeters to inches for Matplotlib
fig_width = fig_width_cm / 2.54
fig_height = fig_height_cm / 2.54

# Create a directory to save the figures and GIFs
output_dir = 'organic_figures'
os.makedirs(output_dir, exist_ok=True)

# Function to create and save a static figure for each organic
def create_static_figure(organic, organicUV, color_as_deposited, color_uv_treated, output_dir):
    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
    
    # Plot organic (As Deposited)
    ax.plot(
        organic['x'], 
        organic['y'], 
        color=color_as_deposited, 
        linestyle='solid', 
        linewidth=2,
        label='As Deposited'  # Label used only for the separate legend
    )
    
    # Plot organicUV (UV Treated)
    ax.plot(
        organicUV['x'], 
        organicUV['y'], 
        color=color_uv_treated, 
        linestyle='dashed', 
        linewidth=2,
        label='UV Treated'  # Label used only for the separate legend
    )
    
    # Set labels and title
    ax.set_xlabel('Time (minutes)', fontsize=12, fontweight='bold')
    ax.set_ylabel('Normalized Measurement', fontsize=12, fontweight='bold')
    ax.set_title(f"{organic['label']} Measurements Over Time", fontsize=14, fontweight='bold')
    
    # Set Y-axis starting at 0 and Y-axis limits
    ax.set_ylim(bottom=y_min, top=y_max * 1.05)  # Slightly higher for aesthetics
    ax.set_xlim(x_min, x_max)
    
    # Add grid
    ax.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)
    
    # Customize spines (all four spines visible for boxing)
    for spine in ax.spines.values():
        spine.set_visible(True)
        spine.set_linewidth(1)
        spine.set_color('black')
    
    # Set major and minor ticks
    ax.xaxis.set_major_locator(MultipleLocator(10))  # Major ticks every 10 minutes
    ax.xaxis.set_minor_locator(MultipleLocator(5))   # Minor ticks every 5 minutes
    ax.yaxis.set_major_locator(MultipleLocator(0.25))  # Major ticks every 0.25 units
    ax.yaxis.set_minor_locator(AutoMinorLocator(4))    # 3 minor ticks between each major tick (0.25 / 4 = 0.0625)
    
    # Customize tick parameters
    ax.tick_params(axis='both', which='major', labelsize=10, direction='in', length=6)
    ax.tick_params(axis='both', which='minor', labelsize=8, direction='in', length=3)
    
    # Exclude legend from individual figures
    # ax.legend(fontsize=10, loc='upper left')  # Commented out to exclude legend
    
    # Adjust layout for tightness
    plt.tight_layout()
    
    # Save the static figure in high resolution
    figure_name = f"{organic['label']}.png"
    fig.savefig(os.path.join(output_dir, figure_name), dpi=300)
    
    # Close the figure to free memory
    plt.close(fig)

# Function to create and save an animated GIF for each organic
def create_animated_gif(organic, organicUV, color_as_deposited, color_uv_treated, output_dir):
    fig, ax = plt.subplots(figsize=(fig_width, fig_height))
    
    # Set labels and title
    ax.set_xlabel('Time (minutes)', fontsize=12, fontweight='bold')
    ax.set_ylabel('Normalized Measurement', fontsize=12, fontweight='bold')
    ax.set_title(f"{organic['label']} Measurements Over Time", fontsize=14, fontweight='bold')
    
    # Set Y-axis starting at 0 and Y-axis limits
    ax.set_ylim(bottom=y_min, top=y_max * 1.05)  # Slightly higher for aesthetics
    ax.set_xlim(x_min, x_max)
    
    # Add grid
    ax.grid(True, which='both', linestyle='--', linewidth=0.5, alpha=0.7)
    
    # Customize spines (all four spines visible for boxing)
    for spine in ax.spines.values():
        spine.set_visible(True)
        spine.set_linewidth(1)
        spine.set_color('black')
    
    # Set major and minor ticks
    ax.xaxis.set_major_locator(MultipleLocator(10))  # Major ticks every 10 minutes
    ax.xaxis.set_minor_locator(MultipleLocator(5))   # Minor ticks every 5 minutes
    ax.yaxis.set_major_locator(MultipleLocator(0.25))  # Major ticks every 0.25 units
    ax.yaxis.set_minor_locator(AutoMinorLocator(4))    # 3 minor ticks between each major tick (0.25 / 4 = 0.0625)
    
    # Customize tick parameters
    ax.tick_params(axis='both', which='major', labelsize=10, direction='in', length=6)
    ax.tick_params(axis='both', which='minor', labelsize=8, direction='in', length=3)
    
    # Initialize lines (empty)
    line_as_deposited, = ax.plot([], [], color=color_as_deposited, linestyle='solid', linewidth=2)
    line_uv_treated, = ax.plot([], [], color=color_uv_treated, linestyle='dashed', linewidth=2)
    
    # Determine the number of frames based on the longer series
    num_frames = max(len(organic['x']), len(organicUV['x']))
    
    # Function to initialize the animation
    def init():
        line_as_deposited.set_data([], [])
        line_uv_treated.set_data([], [])
        return line_as_deposited, line_uv_treated
    
    # Function to update the animation
    def update(frame):
        # Update As Deposited line
        if frame < len(organic['x']):
            x_as_dep = organic['x'][:frame+1]
            y_as_dep = organic['y'][:frame+1]
            line_as_deposited.set_data(x_as_dep, y_as_dep)
        
        # Update UV Treated line
        if frame < len(organicUV['x']):
            x_uv = organicUV['x'][:frame+1]
            y_uv = organicUV['y'][:frame+1]
            line_uv_treated.set_data(x_uv, y_uv)
        
        return line_as_deposited, line_uv_treated
    
    # Create the animation
    ani = FuncAnimation(fig, update, frames=num_frames, init_func=init, blit=True, repeat=False)
    
    # Save the animation as a GIF using PillowWriter without looping
    gif_name = f"{organic['label']}.gif"
    ani.save(os.path.join(output_dir, gif_name), writer=PillowWriter(fps=10), loop=1)
    
    # Close the figure to free memory
    plt.close(fig)
    
    print(f"Animated GIF for {organic['label']} saved as {gif_name}.")

# Step 7: Create a Separate Legend as a PNG
def create_legend(color_as_deposited, color_uv_treated, output_dir):
    fig, ax = plt.subplots(figsize=(5, 2))  # Small figure size for legend
    
    # Plot dummy lines for legend
    ax.plot([], [], color=color_as_deposited, linestyle='solid', linewidth=2, label='As Deposited')
    ax.plot([], [], color=color_uv_treated, linestyle='dashed', linewidth=2, label='UV Treated')
    
    # Create the legend
    legend = ax.legend(loc='center', frameon=False, fontsize=12)
    
    # Remove axes
    ax.axis('off')
    
    # Adjust layout
    plt.tight_layout()
    
    # Save the legend
    legend_filename = "legend.png"
    fig.savefig(os.path.join(output_dir, legend_filename), dpi=300)
    
    plt.close(fig)
    print("Separate legend saved as 'legend.png'.")

# Step 6 & 7: Iterate through each organic and create both static figures and animated GIFs
for org in organics:
    # Get the corresponding UV series
    org_uv_label = f"{org} UV"
    
    # Find the series dictionaries
    organic = next((s for s in organic_series if s['label'] == org), None)
    organicUV = next((s for s in organicUV_series if s['label'] == org_uv_label), None)
    
    if organic and organicUV:
        # Create and save the static figure
        create_static_figure(
            organic=organic, 
            organicUV=organicUV, 
            color_as_deposited=color_as_deposited, 
            color_uv_treated=color_uv_treated, 
            output_dir=output_dir
        )
        print(f"Static figure for {org} created successfully.")
        
        # Create and save the animated GIF
        create_animated_gif(
            organic=organic, 
            organicUV=organicUV, 
            color_as_deposited=color_as_deposited, 
            color_uv_treated=color_uv_treated, 
            output_dir=output_dir
        )
    else:
        print(f"Series for {org} or {org_uv_label} not found.")

# Step 7: Create and save the separate legend
create_legend(color_as_deposited, color_uv_treated, output_dir)


In [None]:
# Step 8: Calculate and Output Normalized Thickness at 60 Minutes

import pandas as pd

# Initialize a list to store results
results = []

for org in organics:
    # Get the corresponding UV series
    org_uv_label = f"{org} UV"
    
    # Find the series dictionaries
    organic = next((s for s in organic_series if s['label'] == org), None)
    organicUV = next((s for s in organicUV_series if s['label'] == org_uv_label), None)
    
    if organic and organicUV:
        # Function to find the closest x-value to target and return corresponding y-value
        def get_closest_y(series, target_x):
            # Calculate the absolute difference between each x and the target
            abs_diff = (series['x'] - target_x).abs()
            # Find the index of the minimum difference
            closest_idx = abs_diff.idxmin()
            # Retrieve the y-value at this index
            closest_y = series['y'].iloc[closest_idx]
            # Retrieve the x-value for reference (optional)
            closest_x = series['x'].iloc[closest_idx]
            return closest_x, closest_y
        
        # Get normalized y at x = 0 (should be 1)
        initial_y = organic['y'].iloc[0]
        initial_y_uv = organicUV['y'].iloc[0]
        
        # Ensure initial_y is 1 (as per normalization)
        # If not, normalize again (optional safety check)
        if initial_y != 1:
            organic['y'] = organic['y'] / initial_y
            initial_y = organic['y'].iloc[0]
        
        if initial_y_uv != 1:
            organicUV['y'] = organicUV['y'] / initial_y_uv
            initial_y_uv = organicUV['y'].iloc[0]
        
        # Find the y-value closest to x = 60 minutes
        closest_x, y_60 = get_closest_y(organic, 60)
        closest_x_uv, y_60_uv = get_closest_y(organicUV, 60)
        
        # Append results to the list
        results.append({
            'Organic': org,
            'Measurement': 'As Deposited',
            'Closest Time (min)': closest_x,
            'Normalized Thickness at 60 min': y_60
        })
        results.append({
            'Organic': org,
            'Measurement': 'UV Treated',
            'Closest Time (min)': closest_x_uv,
            'Normalized Thickness at 60 min': y_60_uv
        })
    else:
        print(f"Series for {org} or {org_uv_label} not found.")

# Create a DataFrame from the results
thickness_df = pd.DataFrame(results)

# Display the DataFrame
print("\nNormalized Thickness at 60 Minutes:")
print(thickness_df)
