<a href="https://colab.research.google.com/github/bythyag/BTP_Project/blob/main/simulation_and_dataset_generation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

# Constants
NUM_COMPONENTS = 4
OUTCOME_TYPES = ['repair', 'replace', 'non']
TIME_RANGE = (1, 10)  # Time range in minutes

def generate_time():
    return round(np.random.uniform(TIME_RANGE[0], TIME_RANGE[1]), 2)

def simulate_component(component_id, cumulative_time):
    process_time = generate_time()
    outcome = np.random.choice(OUTCOME_TYPES)

    if outcome == 'repair':
        repair_time = generate_time()
        total_time = process_time + repair_time
        return f"C{component_id}_repair", process_time, repair_time, total_time, cumulative_time + total_time
    elif outcome == 'replace':
        replace_time = generate_time()
        total_time = process_time + replace_time
        return f"C{component_id}_replace", process_time, replace_time, total_time, cumulative_time + total_time
    else:
        return f"C{component_id}_non", process_time, 0, process_time, cumulative_time + process_time

def generate_dataset(num_samples):
    dataset = []

    for _ in range(num_samples):
        pathway = []
        cumulative_time = 0
        component_details = []

        for component_id in range(1, NUM_COMPONENTS + 1):
            outcome, process_time, additional_time, total_time, cumulative_time = simulate_component(component_id, cumulative_time)
            pathway.append(outcome)
            component_details.append({
                'component': f'C{component_id}',
                'outcome': outcome,
                'process_time': process_time,
                'additional_time': additional_time,
                'total_time': total_time,
                'cumulative_time': cumulative_time
            })

        dataset.append({
            'pathway': '_'.join(pathway),
            'total_time': cumulative_time,
            'component_details': component_details
        })

    return dataset

# Generate dataset
num_samples = 10000
dataset = generate_dataset(num_samples)

# Print some sample results
for i, sample in enumerate(dataset[:3], 1):
    print(f"Sample {i}:")
    print(f"Pathway: {sample['pathway']}")
    print(f"Total Time: {sample['total_time']:.2f} minutes")
    print("Component Details:")
    for component in sample['component_details']:
        print(f"  {component['component']} ({component['outcome']}):")
        print(f"    Process Time: {component['process_time']:.2f} min")
        print(f"    Additional Time: {component['additional_time']:.2f} min")
        print(f"    Total Time: {component['total_time']:.2f} min")
        print(f"    Cumulative Time: {component['cumulative_time']:.2f} min")
    print()

# Calculate and print statistics
pathways = [sample['pathway'] for sample in dataset]
unique_pathways = set(pathways)

print(f"Total number of samples: {num_samples}")
print(f"Number of unique pathways: {len(unique_pathways)}")
print("\nTop 5 most common pathways:")
for pathway, count in sorted([(p, pathways.count(p)) for p in unique_pathways], key=lambda x: x[1], reverse=True)[:5]:
    print(f"{pathway}: {count} occurrences")

Sample 1:
Pathway: C1_replace_C2_non_C3_replace_C4_replace
Total Time: 23.29 minutes
Component Details:
  C1 (C1_replace):
    Process Time: 1.94 min
    Additional Time: 3.68 min
    Total Time: 5.62 min
    Cumulative Time: 5.62 min
  C2 (C2_non):
    Process Time: 1.76 min
    Additional Time: 0.00 min
    Total Time: 1.76 min
    Cumulative Time: 7.38 min
  C3 (C3_replace):
    Process Time: 4.68 min
    Additional Time: 4.18 min
    Total Time: 8.86 min
    Cumulative Time: 16.24 min
  C4 (C4_replace):
    Process Time: 2.90 min
    Additional Time: 4.15 min
    Total Time: 7.05 min
    Cumulative Time: 23.29 min

Sample 2:
Pathway: C1_repair_C2_replace_C3_non_C4_replace
Total Time: 39.66 minutes
Component Details:
  C1 (C1_repair):
    Process Time: 7.61 min
    Additional Time: 3.41 min
    Total Time: 11.02 min
    Cumulative Time: 11.02 min
  C2 (C2_replace):
    Process Time: 4.69 min
    Additional Time: 5.17 min
    Total Time: 9.86 min
    Cumulative Time: 20.88 min
  C3 (

In [None]:
import numpy as np
from collections import defaultdict

# Constants
NUM_COMPONENTS = 4
NUM_BATCHES = 4
OUTCOME_TYPES = ['repair', 'replace', 'non']
TIME_RANGE = (1, 10)  # Time range in minutes

def generate_time():
    return round(np.random.uniform(TIME_RANGE[0], TIME_RANGE[1]), 2)

def simulate_component(component_id):
    process_time = generate_time()
    outcome = np.random.choice(OUTCOME_TYPES)

    if outcome == 'repair':
        repair_time = generate_time()
        total_time = process_time + repair_time
        return f"C{component_id}_repair", process_time, repair_time, total_time
    elif outcome == 'replace':
        replace_time = generate_time()
        total_time = process_time + replace_time
        return f"C{component_id}_replace", process_time, replace_time, total_time
    else:
        return f"C{component_id}_non", process_time, 0, process_time

def simulate_batch():
    batch_pathway = []
    batch_time = 0
    component_details = []

    for component_id in range(1, NUM_COMPONENTS + 1):
        outcome, process_time, additional_time, total_time = simulate_component(component_id)
        batch_pathway.append(outcome)
        batch_time += total_time
        component_details.append({
            'component': f'C{component_id}',
            'outcome': outcome,
            'process_time': process_time,
            'additional_time': additional_time,
            'total_time': total_time
        })

    return '_'.join(batch_pathway), batch_time, component_details

def simulate_full_run():
    full_pathway = []
    total_time = 0
    all_batch_details = []

    for batch_id in range(1, NUM_BATCHES + 1):
        batch_pathway, batch_time, component_details = simulate_batch()
        full_pathway.append(f"B{batch_id}_{batch_pathway}")
        total_time += batch_time
        all_batch_details.append({
            'batch_id': batch_id,
            'pathway': batch_pathway,
            'time': batch_time,
            'components': component_details
        })

    return {
        'full_pathway': '_'.join(full_pathway),
        'total_time': total_time,
        'batch_details': all_batch_details
    }

def generate_dataset(num_simulations):
    dataset = [simulate_full_run() for _ in range(num_simulations)]
    return dataset

# Generate dataset
num_simulations = 10000
dataset = generate_dataset(num_simulations)

# Print some sample results
for i, sample in enumerate(dataset[:2], 1):
    print(f"Simulation {i}:")
    print(f"Full Pathway: {sample['full_pathway']}")
    print(f"Total Time: {sample['total_time']:.2f} minutes")
    print("Batch Details:")
    for batch in sample['batch_details']:
        print(f"  Batch {batch['batch_id']}:")
        print(f"    Pathway: {batch['pathway']}")
        print(f"    Time: {batch['time']:.2f} minutes")
        for component in batch['components']:
            print(f"      {component['component']} ({component['outcome']}):")
            print(f"        Process Time: {component['process_time']:.2f} min")
            print(f"        Additional Time: {component['additional_time']:.2f} min")
            print(f"        Total Time: {component['total_time']:.2f} min")
    print()

# Calculate and print statistics
pathway_counts = defaultdict(int)
total_times = []

for sample in dataset:
    pathway_counts[sample['full_pathway']] += 1
    total_times.append(sample['total_time'])

print(f"Total number of simulations: {num_simulations}")
print(f"Number of unique full pathways: {len(pathway_counts)}")
print(f"\nAverage total time: {np.mean(total_times):.2f} minutes")
print(f"Minimum total time: {min(total_times):.2f} minutes")
print(f"Maximum total time: {max(total_times):.2f} minutes")
print("\nTop 5 most common full pathways:")
for pathway, count in sorted(pathway_counts.items(), key=lambda x: x[1], reverse=True)[:5]:
    print(f"{pathway}: {count} occurrences")

Simulation 1:
Full Pathway: B1_C1_replace_C2_non_C3_repair_C4_repair_B2_C1_replace_C2_repair_C3_non_C4_repair_B3_C1_replace_C2_replace_C3_non_C4_repair_B4_C1_replace_C2_replace_C3_replace_C4_replace
Total Time: 153.76 minutes
Batch Details:
  Batch 1:
    Pathway: C1_replace_C2_non_C3_repair_C4_repair
    Time: 24.04 minutes
      C1 (C1_replace):
        Process Time: 2.24 min
        Additional Time: 4.33 min
        Total Time: 6.57 min
      C2 (C2_non):
        Process Time: 1.09 min
        Additional Time: 0.00 min
        Total Time: 1.09 min
      C3 (C3_repair):
        Process Time: 3.49 min
        Additional Time: 2.57 min
        Total Time: 6.06 min
      C4 (C4_repair):
        Process Time: 1.26 min
        Additional Time: 9.06 min
        Total Time: 10.32 min
  Batch 2:
    Pathway: C1_replace_C2_repair_C3_non_C4_repair
    Time: 42.59 minutes
      C1 (C1_replace):
        Process Time: 3.66 min
        Additional Time: 3.88 min
        Total Time: 7.54 min
      C