## Student's t test :-

# One Sample T test

In [15]:
from scipy.stats import ttest_1samp
import numpy as np

weights = np.array([148, 152, 151, 149, 153, 150, 151, 149, 150, 152])
expected_mean = 150
y=weights.mean()
print(y)
t_statistic, p_value = ttest_1samp(weights, expected_mean)
print(p_value)
if p_value < 0.05:  # alpha level
    print("We reject the null hypothesis and conclude that the mean weight is different from 150 grams.")
else:
    print("We fail to reject the null hypothesis and cannot conclude that the mean weight is different from 150 grams.")


150.5
0.34343639613791355
We fail to reject the null hypothesis and cannot conclude that the mean weight is different from 150 grams.


### we are not rejecting the null hypothesis because the pvalue is more than 0.05

# Independent Sample T test

In [40]:
from scipy.stats import ttest_ind

group1 = np.random.normal(50, 10, 30) # Mean of 50, standard deviation of 10, 30 values
group2 = np.random.normal(55, 10, 30) # Mean of 55, standard deviation of 10, 30 values
print(p_value)
t_statistic, p_value = ttest_ind(group1, group2)
if p_value < 0.05:  # alpha level
    print("We reject the null hypothesis and conclude that there is a significant difference between the two groups.")
else:
    print("We fail to reject the null hypothesis and cannot conclude that there is a significant difference between the two groups.")



0.11549487468642262
We fail to reject the null hypothesis and cannot conclude that there is a significant difference between the two groups.


# Paired T test

In [65]:
from scipy.stats import ttest_rel

baseline = np.random.normal(50, 10, 30)  # Baseline measurements
after_treatment = baseline + np.random.normal(3, 5, 30)  # Measurements after treatment, with some random variation
y=baseline.mean()
print(y)
z=after_treatment.mean()
print(z)
t_statistic, p_value = ttest_rel(baseline, after_treatment)
print(p_value)
if p_value < 0.05:  # alpha level
    print("We reject the null hypothesis and conclude that there is a significant difference between the two groups.")
else:
    print("We fail to reject the null hypothesis and cannot conclude that there is a significant difference between the two groups.")



49.659598971671514
52.97216024967847
0.0024517149286378127
We reject the null hypothesis and conclude that there is a significant difference between the two groups.


# Oneway ANOVA:-

In [93]:
from scipy.stats import f_oneway


np.random.seed(42)  # For reproducibility
group1 = np.random.normal(60, 10, 30)
group2 = np.random.normal(40, 10, 30)
group3 = np.random.normal(50, 10, 30)   
y=group1.mean()
y
z=group2.mean()
print(z)
x=group3.mean()
print(x)
print(p_value)
f_statistic, p_value = f_oneway(group1, group2, group3)

if p_value < 0.05:  # alpha level
    print("We reject the null hypothesis and conclude that there is a significant difference between the groups.")
else:
    print("We fail to reject the null hypothesis and cannot conclude that there is a significant difference between the groups.")


38.78837529710058
50.128847724722235
4.0357251242800276e-11
We reject the null hypothesis and conclude that there is a significant difference between the groups.


### the p value is 0.00000000004035 thus we reject the hypothesis
### i.e., mean of group1 != group2!= group3

In [83]:
from statsmodels.stats.multicomp import pairwise_tukeyhsd

if p_value < 0.05:
    data = np.concatenate([group1, group2, group3])
    labels = ['Group1']*30 + ['Group2']*30 + ['Group3']*30
    tukey_results = pairwise_tukeyhsd(data, labels, alpha=0.05)
    print(tukey_results)
    print(p_value)

 Multiple Comparison of Means - Tukey HSD, FWER=0.05  
group1 group2 meandiff p-adj   lower    upper   reject
------------------------------------------------------
Group1 Group2 -19.3302    0.0 -25.1286 -13.5317   True
Group1 Group3  -7.9897 0.0042 -13.7881  -2.1913   True
Group2 Group3  11.3405    0.0   5.5421  17.1389   True
------------------------------------------------------
4.0357251242800276e-11


### the p value for all comparisons is less than 0.05 thus reject='true'
#### group2>group3>group1 the diff is in negative for 1st -2nd thus 2nd is greater than 1st grp

# Chisquare Test(X^2): test of association

In [91]:
import pandas as pd
data = {
    'Gender': ['Male']*30 + ['Female']*30,
    'Preference': ['A']*25 + ['B']*5 + ['A']*5 + ['B']*25
}

df = pd.DataFrame(data)
print(df)

Unnamed: 0,Gender,Preference
0,Male,A
1,Male,A
2,Male,A
3,Male,A
4,Male,A
5,Male,A
6,Male,A
7,Male,A
8,Male,A
9,Male,A


In [92]:
contingency_table = pd.crosstab(df['Gender'], df['Preference'])
contingency_table

Preference,A,B
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Female,5,25
Male,25,5


In [96]:
from scipy.stats import chi2_contingency

chi2, p, dof, expected = chi2_contingency(contingency_table)
print(p)
if p < 0.05:  # alpha level
    print("We reject the null hypothesis and conclude that there is a significant association between the variables.")
else:
    print("We fail to reject the null hypothesis and cannot conclude that there is a significant association between the variables.")

9.305730297692085e-07
We reject the null hypothesis and conclude that there is a significant association between the variables.


# Two way Anova 


In [14]:
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import ols
from statsmodels.stats.anova import anova_lm
import scipy.stats as stats

# Create the DataFrame
data = {
    'Fertilizer_Type': ['A', 'B', 'C', 'A', 'B', 'C', 'A', 'B', 'C'],
    'Watering_Frequency': ['Low', 'Low', 'Low', 'Medium', 'Medium', 'Medium', 'High', 'High', 'High'],
    'Plant_Height': [15, 18, 12, 20, 22, 18, 25, 28, 23]
}

df = pd.DataFrame(data)

# Perform two-way ANOVA
model = ols('Plant_Height ~ C(Fertilizer_Type) + C(Watering_Frequency) + C(Fertilizer_Type):C(Watering_Frequency)', data=df).fit()
anova_results = anova_lm(model)

alpha = 0.05

#finding p value
formula = 'Plant_Height ~ C(Fertilizer_Type) + C(Watering_Frequency) + C(Fertilizer_Type):C(Watering_Frequency)'
model = stats.f_oneway(df['Plant_Height'][df['Fertilizer_Type'] == 'A'], 
                       df['Plant_Height'][df['Fertilizer_Type'] == 'B'], 
                       df['Plant_Height'][df['Fertilizer_Type'] == 'C'])


#p value
p_value_fertilizer = model.pvalue
p_value_watering = model.pvalue
p_value_interaction = model.pvalue

# Display p-values
print("P-value for Fertilizer Type:", p_value_fertilizer)
print("P-value for Watering Frequency:", p_value_watering)
print("P-value for Interaction:", p_value_interaction)

if p_value_fertilizer < alpha:
    print("We reject the null hypothesis for Fertilizer Type.")
else:
    print("We fail to reject the null hypothesis for Fertilizer Type.")

if p_value_watering < alpha:
    print("We reject the null hypothesis for Watering Frequency.")
else:
    print("We fail to reject the null hypothesis for Watering Frequency.")

if p_value_interaction < alpha:
    print("We reject the null hypothesis for the interaction between Fertilizer Type and Watering Frequency.")
else:
    print("We fail to reject the null hypothesis for the interaction between Fertilizer Type and Watering Frequency.")


P-value for Fertilizer Type: 0.5337535249442098
P-value for Watering Frequency: 0.5337535249442098
P-value for Interaction: 0.5337535249442098
We fail to reject the null hypothesis for Fertilizer Type.
We fail to reject the null hypothesis for Watering Frequency.
We fail to reject the null hypothesis for the interaction between Fertilizer Type and Watering Frequency.


  (model.ssr / model.df_resid))
