In [68]:
import numpy as np
import plotly.graph_objects as go
from scipy.stats import binom
# import statsmodels.api as sm
# from scipy.special import expit  # Import the expit function


In [69]:
# Example 1: Flipping a Coin
n_trials_coin = 10
p_heads = 0.5
n_simulations = 1000
# Using scipy for generating binomial distributed random variables
coin_flips = binom.rvs(n_trials_coin, p_heads, size=n_simulations)

In [70]:
# Theoretical values for coin flipping
k_values_coin = np.arange(0, n_trials_coin+1)
theoretical_pmf_coin = binom.pmf(k_values_coin, n_trials_coin, p_heads) * 100  # Convert to percentage


In [71]:
# Plotting the results using Plotly for the Coin Flips with theoretical PMF
fig = go.Figure()
fig.add_trace(go.Histogram(
    x=coin_flips,
    nbinsx=11,
    name='Experimental',
    marker_color='blue',
    histnorm='percent'
))
fig.add_trace(go.Scatter(
    x=k_values_coin,
    y=theoretical_pmf_coin,
    mode='markers+lines',
    name='Theoretical',
    marker_color='red'
))
fig.update_layout(
    title="Coin Flipping Simulation (Percentage)",
    xaxis_title="Number of Heads",
    yaxis_title="Percentage",
    bargap=0.1
)
fig.show()

In [72]:
theoretical_pmf_coin_count = theoretical_pmf_coin / 100 * n_simulations

In [73]:
# Plotting the results using Plotly for the Coin Flips with theoretical PMF
fig = go.Figure()
fig.add_trace(go.Histogram(
    x=coin_flips,
    nbinsx=11,
    name='Experimental',
    marker_color='blue'
))
fig.add_trace(go.Scatter(
    x=k_values_coin,
    y=theoretical_pmf_coin_count,
    mode='markers+lines',
    name='Theoretical',
    marker_color='red'
))
fig.update_layout(
    title="Coin Flipping Simulation (Counts)",
    xaxis_title="Number of Heads",
    yaxis_title="Count",
    bargap=0.1
)
fig.show()

In [74]:
# Example 2: Quality Control in Manufacturing
n_items = 1000
p_defective = 0.01
n_simulations_qc = 1000
# Using scipy for generating binomial distributed random variables
defects = binom.rvs(n_items, p_defective, size=n_simulations_qc)

In [75]:
# Theoretical values for quality control
k_values_defects = np.arange(0, 50)  # Reasonable range of defective items to show
theoretical_pmf_defects = binom.pmf(k_values_defects, n_items, p_defective) * 100  # Convert to percentage


In [91]:
binom.mean(n_items, p_defective)

10.0

In [92]:
binom.var(n_items, p_defective)

9.9

In [76]:
# Plotting for the Quality Control
fig = go.Figure()
fig.add_trace(go.Histogram(
    x=defects,
    nbinsx=50,
    name='Experimental',
    marker_color='blue',
    histnorm='percent'
))
fig.add_trace(go.Scatter(
    x=k_values_defects,
    y=theoretical_pmf_defects,
    mode='markers+lines',
    name='Theoretical',
    marker_color='red'
))
fig.update_layout(
    title="Manufacturing Defects Simulation (Percentage)",
    xaxis_title="Number of Defective Items",
    yaxis_title="Percentage",
    bargap=0.1
)
fig.show()

In [77]:
theoretical_pmf_defects_count = theoretical_pmf_defects / 100 * n_simulations_qc


In [78]:
# Plotting for the Quality Control
fig = go.Figure()
fig.add_trace(go.Histogram(
    x=defects,
    nbinsx=50,
    name='Experimental',
    marker_color='red'
))
fig.add_trace(go.Scatter(
    x=k_values_defects,
    y=theoretical_pmf_defects_count,
    mode='markers+lines',
    name='Theoretical',
    marker_color='blue'
))
fig.update_layout(
    title="Manufacturing Defects Simulation (Counts)",
    xaxis_title="Number of Defective Items",
    yaxis_title="Count",
    bargap=0.1
)
fig.show()

In [79]:
# Calculate the cumulative probabilities using binom.cdf
cumulative_probabilities_defects = binom.cdf(k_values_defects, n_items, p_defective) * 100


In [80]:
# Plotting for the Quality Control with theoretical PMF and CDF
fig = go.Figure()
fig.add_trace(go.Histogram(
    x=defects,
    nbinsx=50,
    name='Experimental',
    marker_color='blue',
    histnorm='percent'
))
fig.add_trace(go.Scatter(
    x=k_values_defects,
    y=theoretical_pmf_defects,
    mode='markers+lines',
    name='Theoretical PMF',
    marker_color='red'
))
fig.add_trace(go.Scatter(
    x=k_values_defects,
    y=cumulative_probabilities_defects,
    mode='markers+lines',
    name='Theoretical CDF',
    marker_color='green'
))
fig.update_layout(
    title="Manufacturing Defects Simulation (Percentage)",
    xaxis_title="Number of Defective Items",
    yaxis_title="Percentage",
    bargap=0.1
)

In [81]:
defect_counts, bins = np.histogram(defects, bins=50, range=(0, 50))
experimental_cdf_defects = np.cumsum(defect_counts)

In [82]:
defects

array([ 9,  7, 12,  6,  9,  6, 12, 11,  6, 10, 10, 12, 12, 11, 11, 10, 10,
       14, 12,  5,  8, 11, 10,  8,  6, 14, 12,  9, 15,  9,  7, 14,  9, 10,
       13, 12,  9,  7,  9, 16, 11, 10,  6,  7, 16,  4, 11, 11, 13, 11,  7,
        9, 12,  7, 11, 11,  9,  5, 13, 11,  7, 12, 11, 12, 12, 10, 14, 12,
       11,  7,  5, 18,  6,  8,  9, 13,  8,  8, 12, 13, 10,  6,  6,  8,  5,
        7,  8, 11,  9,  9,  7, 11,  9, 10,  9, 12,  7, 13, 12, 13, 13,  9,
       10,  6, 10, 10,  8,  7, 10, 12,  9, 11, 16, 15, 13, 11,  7,  9,  6,
       12,  7,  9,  8,  8,  8,  6,  7, 15, 11,  8,  9, 10, 12,  8, 10,  4,
        9, 13, 13,  7,  2,  9, 13,  5, 10, 10,  8,  7, 17, 10,  5, 11, 11,
        7, 15,  8, 11, 13,  9, 10,  7,  9, 14,  9, 12,  9,  7, 15, 10, 11,
        9,  4, 17, 12, 13, 10, 11, 11,  7, 10, 16,  8,  8, 15, 13, 12, 13,
       13, 13, 15, 13,  5,  8, 12, 11,  6,  7,  6, 13, 11, 16, 10, 10, 17,
       11, 10,  8,  8,  7,  9,  7, 13, 10, 13, 20,  8, 11, 11, 10,  7,  4,
       11,  9, 10, 11, 11

In [83]:
bins

array([ 0.,  1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.,
       13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24., 25.,
       26., 27., 28., 29., 30., 31., 32., 33., 34., 35., 36., 37., 38.,
       39., 40., 41., 42., 43., 44., 45., 46., 47., 48., 49., 50.])

In [84]:
defect_counts

array([  0,   0,   1,   7,  18,  41,  64,  88, 100, 129, 132, 109,  93,
        92,  43,  36,  23,  11,   6,   4,   2,   0,   1,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0], dtype=int64)

In [85]:
experimental_cdf_defects

array([   0,    0,    1,    8,   26,   67,  131,  219,  319,  448,  580,
        689,  782,  874,  917,  953,  976,  987,  993,  997,  999,  999,
       1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
       1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000, 1000,
       1000, 1000, 1000, 1000, 1000, 1000], dtype=int64)

In [86]:
experimental_cdf_defects_percent = experimental_cdf_defects / n_simulations_qc * 100

In [87]:
fig = go.Figure()
fig.add_trace(go.Scatter(
    x=k_values_defects,
    y=cumulative_probabilities_defects,
    mode='markers+lines',
    name='Theoretical CDF',
    marker_color='red'
))
fig.add_trace(go.Scatter(
    x=np.arange(50),
    y=experimental_cdf_defects_percent,  # This should be calculated as described earlier
    mode='markers+lines',
    name='Experimental CDF',
    marker_color='blue'
))
fig.update_layout(
    title="Manufacturing Defects Simulation (Percentage)",
    xaxis_title="Number of Defective Items",
    yaxis_title="Percentage",
    bargap=0.1
)
fig.show()

In [88]:

# # Example data: Let's say we have a dataset where 'x' is a predictor variable (e.g., dosage of a drug),
# # and 'y' is the number of successes (e.g., number of patients recovered) out of 'n' trials (e.g., total patients treated)

# # Generating some example data
# np.random.seed(42)
# x = np.random.normal(0, 1, 100)
# n_trials = 10
# p = expit(x)  # Sigmoid function to map x to probabilities
# y = np.random.binomial(n_trials, p)

In [89]:
# # Adding the intercept to input data
# x = sm.add_constant(x)

In [90]:
# # Fitting a binomial logistic regression model
# model = sm.GLM(y, x, family=sm.families.Binomial())
# result = model.fit()

# print(result.summary())