In [2]:
import numpy as np
from scipy import stats

# Sample data (random numbers)
np.random.seed(42)  # for reproducibility
sample_data = np.random.normal(loc=10, scale=2, size=30)  # sample of 30 observations from a normal distribution with mean 10 and standard deviation 2
print(sample_data)
# Population parameters
population_mean = 10  # hypothesized population mean

# Perform one-sample t-test
t_statistic, p_value = stats.ttest_1samp(sample_data, population_mean)

# Interpret the results
alpha = 0.05  # significance level
print("t-statistic:", t_statistic)
print("p-value:", p_value)

if p_value < alpha:
    print("Reject the null hypothesis. There is enough evidence to suggest that the sample mean differs significantly from the population mean.")
else:
    print("Fail to reject the null hypothesis. There is not enough evidence to suggest that the sample mean differs significantly from the population mean.")

[10.99342831  9.7234714  11.29537708 13.04605971  9.53169325  9.53172609
 13.15842563 11.53486946  9.06105123 11.08512009  9.07316461  9.06854049
 10.48392454  6.17343951  6.55016433  8.87542494  7.97433776 10.62849467
  8.18395185  7.1753926  12.93129754  9.5484474  10.13505641  7.15050363
  8.91123455 10.22184518  7.69801285 10.75139604  8.79872262  9.4166125 ]
t-statistic: -1.1450173670383255
p-value: 0.2615641461880169
Fail to reject the null hypothesis. There is not enough evidence to suggest that the sample mean differs significantly from the population mean.


In [4]:
import random
import pandas as pd

np.random.seed(0)


data = pd.DataFrame({
    'Treatment': np.random.choice(['A', 'B', 'C'], size=100),
    'score': np.random.normal(loc=10, scale=2, size=100)
})

print(data)


   Treatment      score
0          A   8.626821
1          B  10.029747
2          A   9.248668
3          B   9.923553
4          B  10.735949
..       ...        ...
95         A   9.933528
96         C  10.131283
97         A  10.531571
98         B  12.303684
99         C  10.276086

[100 rows x 2 columns]


In [7]:
mean_score_per_group=data.groupby('Treatment' )['score'].mean()
print(mean_score_per_group)

Treatment
A     9.611241
B    10.099304
C    10.010667
Name: score, dtype: float64


In [8]:
group_A = data[data['Treatment']=='A']['score']
group_B = data[data['Treatment']=='B']['score']
t_stat, p_value = stats.ttest_ind(group_A, group_B)
print("t-statistic:", t_stat)
print("p-value:", p_value)

t-statistic: -1.0341676575352792
p-value: 0.3045674995802033


In [10]:
#ANOVA
f_stat,p_value=stats.f_oneway(data[data['Treatment']=='A']['score'],
                              data[data['Treatment']=='B']['score'],
                              data[data['Treatment']=='C']['score'])
print("F-statistic:",f_stat)
print("p-value:",p_value)

F-statistic: 0.6019178068930898
p-value: 0.5497930622874936
