In [1]:
import pandas as pd

# SENSITIVY, POWER, EFFECT SIZE

In [2]:
d = pd.read_excel('laser_ion_beam_data.xlsx')

In [3]:
d = d.iloc[:,:8]
d.columns = ['intensity','energy','pulse_width','cutoff_energy','power','spot_size','target_material','target_thickness']

In [4]:
d['target_material'].value_counts()

target_material
Plastik           960
Gold (Au)          62
Polystyrol         38
Aluminium (Al)     11
Polypropylen        9
Kupfer (Cu)         5
CH                  4
Titan (Ti)          3
CVD                 1
Name: count, dtype: int64

In [5]:
d.head()

Unnamed: 0,intensity,energy,pulse_width,cutoff_energy,power,spot_size,target_material,target_thickness
0,2.384369e+21,106.191,500,20.2,212382000000000.0,2.804465,Gold (Au),10.0
1,2.006771e+21,126.765,500,29.8,253530000000000.0,3.339978,Gold (Au),10.0
2,3.05072e+21,122.634,500,30.1,245268000000000.0,2.664386,Gold (Au),10.0
3,2.833589e+21,143.694,500,30.1,287388000000000.0,2.992566,Gold (Au),10.0
4,4.507079e+21,148.068,500,30.6,296136000000000.0,2.408662,Gold (Au),10.0


In [6]:
# Number of bootstrap samples
n_bootstrap_samples = 1000

# List to store bootstrap samples statistics
bootstrap_samples_means = []

# Generating bootstrap samples
for _ in range(n_bootstrap_samples):
    # Resample the DataFrame with replacement
    bootstrap_sample = d.sample(n=len(d), replace=True)
    # Compute the mean of each column in the bootstrap sample
    sample_means = bootstrap_sample.mean(numeric_only=True)
    bootstrap_samples_means.append(sample_means)

# Convert list to DataFrame
bootstrap_means_df = pd.DataFrame(bootstrap_samples_means)

# Display the first few rows of the bootstrap means DataFrame
print(bootstrap_means_df.head())

# Compute the overall mean and standard deviation for each column from the bootstrap samples
overall_mean = bootstrap_means_df.mean()
overall_std = bootstrap_means_df.std()

print("\nOverall Mean:\n", overall_mean)
print("\nOverall Standard Deviation:\n", overall_std)

      intensity     energy  pulse_width  cutoff_energy         power  \
0  5.085224e+20  24.271082   320.405306       9.654392  6.945573e+13   
1  5.292195e+20  26.287693   524.880146       9.145334  7.175072e+13   
2  5.052561e+20  20.689879   304.831656       8.499039  6.651203e+13   
3  4.938844e+20  31.353336   453.065874       8.760339  6.390486e+13   
4  5.038760e+20  17.714497   395.684355       8.479597  6.308228e+13   

   spot_size  target_thickness  
0   4.215656          1.419333  
1   4.490784          1.761569  
2   4.323528          1.091221  
3   4.379986          1.306766  
4   3.919990          1.150332  

Overall Mean:
 intensity           5.039375e+20
energy              2.381535e+01
pulse_width         4.188921e+02
cutoff_energy       8.777404e+00
power               6.602274e+13
spot_size           4.154340e+00
target_thickness    1.317476e+00
dtype: float64

Overall Standard Deviation:
 intensity           1.865486e+19
energy              3.909045e+00
pulse_width

In [10]:
d = d[d['target_material'] == 'Plastik']
d_numeric = d.drop(columns='target_material')

In [11]:
import numpy as np
from sklearn.utils import resample
from scipy import stats

# Assuming d_numeric is your multivariate data with shape (n_samples, n_features)
n_bootstrap_samples = 200000  # Example of a high number of bootstrap samples
bootstrap_samples = []

# Generate bootstrap samples
for _ in range(n_bootstrap_samples):
    sample = resample(d_numeric, replace=True)
    bootstrap_samples.append(sample)

# Calculate mean for each bootstrap sample
bootstrap_means = np.array([np.mean(sample, axis=0) for sample in bootstrap_samples])

# Calculate the null hypothesis value (mean of the original dataset)
null_hypothesis_value = np.mean(d_numeric, axis=0).values
alpha = 0.05
p_values = []

# Perform a t-test for each feature independently
for feature_idx in range(bootstrap_means.shape[1]):
    sample_means_feature = bootstrap_means[:, feature_idx]
    t_stat, p_value = stats.ttest_1samp(sample_means_feature, null_hypothesis_value[feature_idx])
    p_values.append(p_value)

p_values = np.array(p_values)
power = np.mean(p_values < alpha)

# Calculate effect sizes (Cohen's d) for each feature
std_dev = np.std(d_numeric, axis=0, ddof=1).values
effect_sizes = (bootstrap_means - null_hypothesis_value) / std_dev

# Summary statistics of effect sizes
mean_effect_size = np.mean(effect_sizes, axis=0)
std_effect_size = np.std(effect_sizes, axis=0)

# Sensitivity analysis by varying number of bootstrap samples
sensitivity_results = {}
for n_samples in [10000, 50000, 100000, 200000]:
    bootstrap_samples = []
    for _ in range(n_samples):
        sample = resample(d_numeric, replace=True)
        bootstrap_samples.append(sample)
    bootstrap_means = np.array([np.mean(sample, axis=0) for sample in bootstrap_samples])
    effect_sizes = (bootstrap_means - null_hypothesis_value) / std_dev
    mean_effect_size = np.mean(effect_sizes, axis=0)
    std_effect_size = np.std(effect_sizes, axis=0)
    sensitivity_results[n_samples] = (mean_effect_size, std_effect_size)

# Output results
print(f'Power: {power}')
print(f'Mean Effect Size: {mean_effect_size}')
print(f'Standard Deviation of Effect Sizes: {std_effect_size}')
print('Sensitivity Analysis Results:')
for n_samples, (mean_eff, std_eff) in sensitivity_results.items():
    print(f'Bootstrap Samples: {n_samples}')
    print(f'  Mean Effect Size: {mean_eff}')
    print(f'  Standard Deviation of Effect Sizes: {std_eff}')


  res = hypotest_fun_out(*samples, **kwds)


Power: 0.14285714285714285
Mean Effect Size: [-9.05049676e-05 -2.03031005e-05  9.15726351e-05  3.84599310e-05
 -9.05049676e-05  0.00000000e+00  9.04408846e-05]
Standard Deviation of Effect Sizes: [0.03219544 0.03228765 0.03221505 0.03234659 0.03219544 0.
 0.03220727]
Sensitivity Analysis Results:
Bootstrap Samples: 10000
  Mean Effect Size: [ 3.04794031e-04  6.18917058e-05 -2.43469945e-04 -2.64703306e-04
  3.04794031e-04  0.00000000e+00 -5.54298395e-04]
  Standard Deviation of Effect Sizes: [0.03207581 0.03194095 0.03221918 0.0324592  0.03207581 0.
 0.03254167]
Bootstrap Samples: 50000
  Mean Effect Size: [-2.79880754e-05  1.28637969e-04  1.31145857e-07 -1.17342793e-05
 -2.79880754e-05  0.00000000e+00  2.48086830e-04]
  Standard Deviation of Effect Sizes: [0.03230511 0.03229768 0.03236063 0.03224677 0.03230511 0.
 0.03238289]
Bootstrap Samples: 100000
  Mean Effect Size: [ 1.06814454e-04  2.24545719e-04 -1.06781844e-04 -7.19673315e-05
  1.06814454e-04  0.00000000e+00 -6.70906809e-05]
 