In [2]:
import pandas as pd
import numpy as np

# --- Configuration ---
NUM_WEEKS = 100  # Span of 100 weeks
CYCLES_PER_WEEK = 11  # Generate 11 cycles per week (100 * 11 = 1100 rows total)
PASS_PROBABILITY = 0.70
AWS_HOURLY_COST = 0.10
OUTLIER_PROBABILITY = 0.015 # 1.5% chance of an outlier AWS usage
HIGH_OUTLIER_RANGE = (100, 200) # Outlier usage in hours

data = []
test_case_number = 1 # Initialize the Test Case Counter

# --- Data Generation Loop ---
for week in range(1, NUM_WEEKS + 1):
    for cycle in range(CYCLES_PER_WEEK):
        
        # 1. Test Case Number (Sequential Identifier)
        current_test_case = test_case_number

        # 2. Randomize Test Run 1 Status (70% Pass)
        run1_status = np.random.choice(['Pass', 'Fail'], p=[PASS_PROBABILITY, 1 - PASS_PROBABILITY])

        # 3. Apply Test Status Logic for Run 2 and 3
        if run1_status == 'Pass':
            # If Run 1 passes, skip Run 2 and Run 3
            run2_status = 'N/A'
            run3_status = 'N/A'
            manual_test = 'No'

        else: # Run 1 Failed
            # Randomize Test Run 2 Status (70% Pass)
            run2_status = np.random.choice(['Pass', 'Fail'], p=[PASS_PROBABILITY, 1 - PASS_PROBABILITY])

            if run2_status == 'Pass':
                # If Run 2 passes, skip Run 3
                run3_status = 'N/A'
                manual_test = 'No'

            else: # Run 1 and Run 2 Failed
                # Randomize Test Run 3 Status (70% Pass)
                run3_status = np.random.choice(['Pass', 'Fail'], p=[PASS_PROBABILITY, 1 - PASS_PROBABILITY])

                if run3_status == 'Pass':
                    # If Run 3 passes, no manual test
                    manual_test = 'No'
                else:
                    # If all 3 failed, trigger manual test
                    manual_test = 'Yes'

        # 4. AWS Machine Usage and Cost Calculation (with Outlier Logic)
        is_outlier = np.random.rand() < OUTLIER_PROBABILITY

        if is_outlier:
            # Generate a high outlier usage
            aws_usage = np.random.uniform(HIGH_OUTLIER_RANGE[0], HIGH_OUTLIER_RANGE[1])
        else:
            # Generate normal usage (10-50 hours)
            aws_usage = np.random.uniform(10, 50)

        cost = round(aws_usage * AWS_HOURLY_COST, 2) # Rounded to 2 decimal places

        # 5. Append the generated row
        data.append({
            'Test Case Number': current_test_case, # New Column
            'Week': f'Week {week}',
            'Test Run 1 Status': run1_status,
            'Test Run 2 Status': run2_status,
            'Test Run 3 Status': run3_status,
            'Manual Test Triggered (Yes/No)': manual_test,
            'AWS Machine Usage (hours)': round(aws_usage, 2),
            'Cost (USD)': cost
        })
        
        # Increment the Test Case Number for the next row
        test_case_number += 1 

# --- Create and Export DataFrame ---
df = pd.DataFrame(data)

# Print a preview and save to CSV
print(f"Generated dataset with {len(df)} rows.")
print("\nFirst 5 rows of the dataset:")
print(df.head())

file_name = 'simulated_qa_results_with_outliers_and_case_number.csv'
df.to_csv(file_name, index=False) # Uncomment to save the file

Generated dataset with 1100 rows.

First 5 rows of the dataset:
   Test Case Number    Week Test Run 1 Status Test Run 2 Status  \
0                 1  Week 1              Pass               N/A   
1                 2  Week 1              Pass               N/A   
2                 3  Week 1              Pass               N/A   
3                 4  Week 1              Fail              Pass   
4                 5  Week 1              Pass               N/A   

  Test Run 3 Status Manual Test Triggered (Yes/No)  AWS Machine Usage (hours)  \
0               N/A                             No                      31.42   
1               N/A                             No                      23.77   
2               N/A                             No                      15.98   
3               N/A                             No                      33.15   
4               N/A                             No                      32.84   

   Cost (USD)  
0        3.14  
1        2.38 