# Q5

## Q5.1

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import random

# Set random seed
np.random.seed(42)

# Define total number of products
number_of_products = 1000


gen_1_98 = [0 if random.random() < 0.02 else 1 for _ in range(1000)]

# Create data dictionary
data = {'product_id':np.arange(1, number_of_products+1).tolist(),
       'accepted':gen_1_98}

# Transform dictionary into a data frame
df = pd.DataFrame(data)

# View data frame
df

Unnamed: 0,product_id,accepted
0,1,1
1,2,1
2,3,1
3,4,1
4,5,1
...,...,...
995,996,1
996,997,1
997,998,1
998,999,1


---

## Q5.2

In [None]:
def double_sampling(df, n1, n2, r1, c1, c2):
    x1 = 0
    x2 = 0

    # first sampling
    first_sample = df.sample(n=n1).sort_values(by='product_id')
    denied_products1 =first_sample['accepted'].value_counts(0)
    x1 = int(denied_products1[1])
    if x1 <= c1:
        return "a1", x1, x2# accaped in the first sampling
    if x1 >= r1:
        return "r1", x1, x2 # rejected in the first sampling
    
    # Remove the first sample from the DataFrame (without replacement)
    remaining_df = df.drop(first_sample.index)
    
    # second sampling
    second_sample = remaining_df.sample(n=n2, random_state=42).sort_values(by='product_id')
    denied_products2 = second_sample['accepted'].value_counts(0)
    x2 = int(denied_products2[1])
    
    # Check acceptance after the second sample
    if x1 + x2 <= c2:
        return "a2", x1, x2 # accaped in the second sampling
    
    return "a2" , x1, x2# rejected in the second sampling

In [None]:
phase_flag, x1, x2 = double_sampling(df=df, n1=50, n2=50, r1=2, c1=0, c2=2)

if phase_flag == "a1":
    print(f"The batch was accepted in the first sampling with {x1 + x2} damaged items.")
elif phase_flag == "r1":
    print(f"The batch was rejected in the first sampling with {x1 + x2} damaged items.")
elif phase_flag == "a2":
    print(f"The batch was accepted in the second sampling with {x1 + x2} damaged items.")
else:
    print(f"The batch was rejected in the second sampling with {x1 + x2} damaged items.")

The batch was rejected in the first sampling with 48 damaged items.


### Explanation of the Double Sampling Process

The code implements a **double sampling procedure** to evaluate whether a batch should be accepted or rejected based on defective items:

1. **First Sampling**:
   - A specified number of items (`n1`) is drawn from the dataset **without replacement**.
   - The number of defective items (`x1`) is counted.
   - Decision rules:
     - If `x1 <= c1`, the batch is **accepted immediately** (`a1`).
     - If `x1 >= r1`, the batch is **rejected immediately** (`r1`).
     - Otherwise, a second sampling is performed.

2. **Second Sampling**:
   - Another sample of size `n2` is drawn from the remaining items.
   - The number of defective items in this sample (`x2`) is counted.
   - The total defective items (`x1 + x2`) from both samples is evaluated:
     - If the total defective items are `<= c2`, the batch is **accepted** (`a2`).
     - Otherwise, the batch is **rejected**.

### Result of the Sampling
In this specific run, the batch was **rejected in the first sampling** because the number of defective items (`x1 = 48`) exceeded the rejection threshold (`r1 = 2`).

---

## Q5.3

In [19]:
from scipy.stats import poisson

In [20]:
# Calculate producer's risk (alpha)
lambda_1 = 50 * 0.02  # Expected defects in the first sample
p_a1 = poisson.cdf(0, lambda_1)  # Probability of acceptance after the first sample

lambda_2 = 50 * 0.02  # Expected defects in the second sample
p_a2 = poisson.pmf(1, lambda_1) * poisson.cdf(1, lambda_2)  # Probability of acceptance after the second sample

p_a = p_a1 + p_a2  # Total probability of acceptance
alpha = 1 - p_a  # Producer's risk

print(f"Producer's Risk (alpha): {alpha:.4f}")

Producer's Risk (alpha): 0.3614
