In [1]:
import pandas as pd

f = "who_audit_synthetic_data.csv"
df = pd.read_csv(f)
df.head()

Unnamed: 0,batch_id,timestamp,tray_position,batch_temp_high,batch_temp_low,ambient_temp,facility_temp,sensor_status,quality_check_status,container_id,...,thermal_reading,relative_humidity,calibration_date,product_name,heat_unit,vaccine_type,cold_value,t_max,t_min,notes
0,B10039,2025-02-07 8:00:00,AM4,3.6,1.3,20.9,18.6,ACTIVE,FAIL,C5539,...,3.5,47.3,2024-12-23,Polio,3.6,Inactivated,1.2,8.0,2.0,Temperature below range
1,B10039,2025-02-07 12:00:00,AM4,1.2,-0.1,20.4,18.1,ACTIVE,FAIL,C5539,...,1.1,47.6,2024-12-23,Polio,1.2,Inactivated,-0.2,8.0,2.0,Critical low temperature
2,B10039,2025-02-07 16:00:00,AM4,1.9,0.5,20.7,18.4,ACTIVE,FAIL,C5539,...,1.8,47.4,2024-12-23,Polio,1.9,Inactivated,0.4,8.0,2.0,Temperature slowly recovering
3,B10039,2025-02-07 20:00:00,AM4,2.9,1.0,21.2,18.8,ACTIVE,FAIL,C5539,...,2.8,47.2,2024-12-23,Polio,2.9,Inactivated,0.9,8.0,2.0,Temperature still below range
4,B10040,2025-02-08 8:00:00,AN5,4.5,2.0,21.6,19.2,ACTIVE,PASS,C5540,...,4.4,46.8,2024-12-24,Hepatitis B,4.5,Recombinant,1.9,8.0,2.0,Normal operation


In [2]:
df.product_name.value_counts()


product_name
Varicella        20
MMR              20
Polio            16
Hepatitis B      16
DTaP             16
HPV              16
Rotavirus        16
Pneumococcal     16
Meningococcal    16
Influenza        16
Tdap             16
Hepatitis A      16
Name: count, dtype: int64

In [8]:
import numpy as np

def generate_random_temps(df, acceptable_ranges):
    """
    Generate random temperatures within acceptable ranges for each vaccine type.
    
    Parameters:
    df (pd.DataFrame): DataFrame containing vaccine data
    acceptable_ranges (dict): Dictionary mapping vaccine names to (min_temp, max_temp) tuples
    
    Returns:
    pd.DataFrame: DataFrame with updated batch_temp_high and batch_temp_low columns
    """
    # Create a copy to avoid modifying the original dataframe
    df_updated = df.copy()
    
    # Generate random temperatures for each row based on vaccine type
    for idx, row in df_updated.iterrows():
        vaccine = row['product_name']
        
        if vaccine in acceptable_ranges:
            min_temp, max_temp = acceptable_ranges[vaccine]
            
            # Generate two random temperatures within the range
            temp1 = np.random.uniform(min_temp, max_temp)
            temp2 = np.random.uniform(min_temp, max_temp)
            
            # Assign the higher value to batch_temp_high and lower to batch_temp_low
            df_updated.at[idx, 'batch_temp_high'] = max(temp1, temp2)
            df_updated.at[idx, 'batch_temp_low'] = min(temp1, temp2)
        else:
            print(f"Warning: No acceptable range found for vaccine '{vaccine}' at index {idx}")
    
    # Round to 1 decimal place to match original data format
    df_updated['batch_temp_high'] = df_updated['batch_temp_high'].round(1)
    df_updated['batch_temp_low'] = df_updated['batch_temp_low'].round(1)
    
    return df_updated

# Example usage:
acceptable_ranges = {
    'Varicella': (2.5, 7.5),
    'MMR': (2.5, 7.5),
    'Polio': (2.0, 8.0),
    'Hepatitis B': (2.2, 7.8),
    'DTaP': (3.0, 7.0),
    'HPV': (2.5, 7.2),
    'Rotavirus': (2.5, 7.5),
    'Pneumococcal': (2.8, 6.5),
    'Meningococcal': (2.8, 6.5),
    'Influenza': (2.0, 8.0),
    'Tdap': (3.0, 7.0),
    'Hepatitis A': (2.0, 8.0)
}

In [10]:
df_temps = generate_random_temps(df, acceptable_ranges)
df_temps

Unnamed: 0,batch_id,timestamp,tray_position,batch_temp_high,batch_temp_low,ambient_temp,facility_temp,sensor_status,quality_check_status,container_id,...,thermal_reading,relative_humidity,calibration_date,product_name,heat_unit,vaccine_type,cold_value,t_max,t_min,notes
0,B10039,2025-02-07 8:00:00,AM4,3.7,3.7,20.9,18.6,ACTIVE,FAIL,C5539,...,3.5,47.3,2024-12-23,Polio,3.6,Inactivated,1.2,8.0,2.0,Temperature below range
1,B10039,2025-02-07 12:00:00,AM4,5.4,4.6,20.4,18.1,ACTIVE,FAIL,C5539,...,1.1,47.6,2024-12-23,Polio,1.2,Inactivated,-0.2,8.0,2.0,Critical low temperature
2,B10039,2025-02-07 16:00:00,AM4,6.5,5.7,20.7,18.4,ACTIVE,FAIL,C5539,...,1.8,47.4,2024-12-23,Polio,1.9,Inactivated,0.4,8.0,2.0,Temperature slowly recovering
3,B10039,2025-02-07 20:00:00,AM4,7.4,3.1,21.2,18.8,ACTIVE,FAIL,C5539,...,2.8,47.2,2024-12-23,Polio,2.9,Inactivated,0.9,8.0,2.0,Temperature still below range
4,B10040,2025-02-08 8:00:00,AN5,7.8,4.1,21.6,19.2,ACTIVE,PASS,C5540,...,4.4,46.8,2024-12-24,Hepatitis B,4.5,Recombinant,1.9,8.0,2.0,Normal operation
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,B10037,2025-02-05 20:00:00,AK2,3.9,2.8,21.7,19.3,ACTIVE,PASS,C5537,...,4.7,46.7,2024-12-21,Varicella,4.8,Live attenuated,2.1,8.0,2.0,Normal operation
196,B10038,2025-02-06 8:00:00,AL3,6.9,3.4,21.8,19.4,ACTIVE,PASS,C5538,...,4.6,46.6,2024-12-22,MMR,4.7,Live attenuated,2.1,8.0,2.0,Normal operation
197,B10038,2025-02-06 12:00:00,AL3,4.3,3.8,23.4,19.9,ACTIVE,PASS,C5538,...,4.8,45.4,2024-12-22,MMR,4.9,Live attenuated,2.2,8.0,2.0,Normal operation
198,B10038,2025-02-06 16:00:00,AL3,7.3,5.3,22.9,19.7,ACTIVE,PASS,C5538,...,4.6,45.7,2024-12-22,MMR,4.7,Live attenuated,2.1,8.0,2.0,Normal operation


In [21]:
df_temps.to_csv(f, index=False)

In [3]:
def check_temperature_compliance(df):
    """
    Check if batch temperatures are within acceptable range for each vaccine.
    
    Parameters:
    df (pandas.DataFrame): DataFrame with vaccine temperature data
    
    Returns:
    pandas.DataFrame: Original DataFrame with additional 'temp_compliant' column
    """
    # Define acceptable temperature ranges for each vaccine
    acceptable_ranges = {
        'Varicella': (2.5, 7.5),
        'MMR': (2.5, 7.5),
        'Polio': (2.0, 8.0),
        'Hepatitis B': (2.2, 7.8),
        'DTaP': (3.0, 7.0),
        'HPV': (2.5, 7.2),
        'Rotavirus': (2.5, 7.5),
        'Pneumococcal': (2.8, 6.5),
        'Meningococcal': (2.8, 6.5),
        'Influenza': (2.0, 8.0),
        'Tdap': (3.0, 7.0),
        'Hepatitis A': (2.0, 8.0)
    }
    
    # Create a copy to avoid modifying original DataFrame
    result_df = df.copy()
    
    # Map vaccine names to their acceptable ranges
    result_df['acceptable_min'] = result_df['product_name'].map(lambda x: acceptable_ranges.get(x, (None, None))[0])
    result_df['acceptable_max'] = result_df['product_name'].map(lambda x: acceptable_ranges.get(x, (None, None))[1])
    
    # Check if both batch temperatures are within acceptable range
    temp_compliant = (
        (result_df['batch_temp_high'] >= result_df['acceptable_min']) & 
        (result_df['batch_temp_high'] <= result_df['acceptable_max']) &
        (result_df['batch_temp_low'] >= result_df['acceptable_min']) & 
        (result_df['batch_temp_low'] <= result_df['acceptable_max'])
    )
    
    result_df['temp_compliant'] = temp_compliant
    
    return result_df

In [24]:
df_temps = pd.read_csv(f)

In [25]:
result = check_temperature_compliance(df_temps)
result[result['temp_compliant'] == False]

Unnamed: 0,batch_id,timestamp,tray_position,batch_temp_high,batch_temp_low,ambient_temp,facility_temp,sensor_status,quality_check_status,container_id,...,relative_humidity,calibration_date,product_name,heat_unit,cold_value,t_min,notes,acceptable_min,acceptable_max,temp_compliant
28,B10046,2025-02-14 8:00:00,AT1,4.4,1.5,21.5,19.1,ACTIVE,PASS,C5546,...,46.9,2024-12-30,Influenza,4.4,1.9,2.0,Normal operation,2.0,8.0,False
38,B10048,2025-02-16 16:00:00,AV3,8.2,3.5,23.6,20.1,ACTIVE,PASS,C5548,...,45.0,2025-01-01,Hepatitis A,5.0,2.3,2.0,Normal operation,2.0,8.0,False
48,B10001,2025-01-01 8:00:00,A1,3.9,2.4,22.3,19.8,ACTIVE,PASS,C5501,...,45.2,2024-11-15,Varicella,5.2,2.6,2.0,Normal operation,2.5,7.5,False


In [23]:
result[(result['temp_compliant'] == False)][['batch_id', 'timestamp']]


Unnamed: 0,batch_id,timestamp
28,B10046,2025-02-14 8:00:00
38,B10048,2025-02-16 16:00:00
48,B10001,2025-01-01 8:00:00
