# Stats 7

In [12]:
import logging

logging.basicConfig(filename="14MarInfo.log", level=logging.INFO, format="%(asctime)s %(name)s %(message)s")

In [13]:
# answer 1
import numpy as np
from scipy.stats import f

def variance_ratio_test(x, y):
    n = len(x)
    m = len(y)
    f_value = np.var(x, ddof=1) / np.var(y, ddof=1)
    p_value = f.sf(f_value, n-1, m-1)
    return f_value, p_value
"""
This function takes two arrays x and y as inputs and returns the F-value and p-value for 
the variance ratio test. The function first calculates the F-value by dividing the sample 
variance of x by the sample variance of y. The ddof=1 argument is used to calculate the unbiased
estimate of the variance. The function then uses the scipy.stats.f.sf function to calculate the p-value,
which represents the probability of observing an F-value as extreme or more extreme than the calculated
value under the null hypothesis that the variances of x and y are equal.
"""
x = [1, 2, 3, 4, 5]
y = [1.2, 2.4, 3.6, 4.8, 6.0]
f_value, p_value = variance_ratio_test(x, y)
print("F-value: ", f_value)
print("p-value: ", p_value)

F-value:  0.6944444444444444
p-value:  0.6337799199051903


In [14]:
# answer 2
# critical F-value for a two-tailed test given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution:
    
from scipy.stats import f

def critical_f_value(df_num, df_denom):
    alpha = 0.05
    f_crit = f.ppf(alpha/2, df_num, df_denom)
    return f_crit
"""
This function uses the scipy.stats.f.ppf function to calculate the critical F-value for a two-tailed test. 
The ppf function takes the significance level divided by two (since it's a two-tailed test) and the
degrees of freedom for the numerator and denominator of the F-distribution as arguments. The function
returns the critical F-value.
"""
df_num = 3
df_denom = 16
f_crit = critical_f_value(df_num, df_denom)
print("Critical F-value: ", f_crit)

Critical F-value:  0.07026656300420833


In [15]:
# answer 3
# program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal:
    
import numpy as np
from scipy.stats import f

# Set seed for reproducibility
np.random.seed(1234)
# Generate random samples from two normal distributions with known variances
mean1, mean2 = 0, 0
variance1, variance2 = 1, 1.5
sample_size1, sample_size2 = 20, 25

sample1 = np.random.normal(mean1, np.sqrt(variance1), sample_size1)
sample2 = np.random.normal(mean2, np.sqrt(variance2), sample_size2)

# Perform F-test for equal variances
f_value = np.var(sample1, ddof=1) / np.var(sample2, ddof=1)
df1 = sample_size1 - 1
df2 = sample_size2 - 1
p_value = f.sf(f_value, df1, df2) * 2

# Output results
print("Sample 1 Mean: ", np.mean(sample1))
print("Sample 1 Variance: ", np.var(sample1, ddof=1))
print("Sample 2 Mean: ", np.mean(sample2))
print("Sample 2 Variance: ", np.var(sample2, ddof=1))
print("F-value: ", f_value)
print("Degrees of Freedom (numerator, denominator): ", df1, df2)
print("p-value: ", p_value)

Sample 1 Mean:  -0.0112972439581026
Sample 1 Variance:  1.214081921105498
Sample 2 Mean:  0.19593379950317655
Sample 2 Variance:  1.3443836141343677
F-value:  0.9030770000028828
Degrees of Freedom (numerator, denominator):  19 24
p-value:  1.169901915554453


# answer 4
To conduct an F-test for comparing the variances of two populations, we need to follow these steps:

Step 1: State the null and alternative hypotheses

The null hypothesis H0: σ1^2 = σ2^2 (the variances are equal)
The alternative hypothesis Ha: σ1^2 ≠ σ2^2 (the variances are not equal)

Step 2: Determine the significance level α and the degrees of freedom for the two populations

The significance level is given as 5%, which corresponds to a critical value of F with (df1 = 11, df2 = 11) 
degrees of freedom.

Step 3: Compute the test statistic F

The test statistic F is calculated as the ratio of the sample variances:

F = S1^2 / S2^2, where S1^2 and S2^2 are the sample variances of the two populations.

In this case, we have:

S1^2 = 10 and n1 = 12
S2^2 = 15 and n2 = 12

Therefore, the test statistic F is:

F = 10/15 = 0.67

Step 4: Determine the critical value and the p-value

The critical value of F with (df1 = 11, df2 = 11) degrees of freedom at a 5% significance level is 2.32.

The p-value is the probability of obtaining a test statistic as extreme or more extreme than the observed value,
assuming the null hypothesis is true. We can use an F-distribution table or a statistical software to find the 
p-value. In this case, the p-value is approximately 0.691.

Step 5: Make a decision and interpret the results

Since the calculated F-value (0.67) is smaller than the critical value (2.32), we fail to reject the null 
hypothesis. That is, we do not have sufficient evidence to conclude that the variances of the two populations 
are significantly different at the 5% level of significance. Therefore, we can conclude that there is no
significant difference in the variances of the two populations.

In [16]:
# answer 4
import numpy as np
from scipy.stats import f
n1 = 12
n2 = 12
var1 = 10
var2 = 15
# Compute the F-statistic and p-value using the samples
f_stat = var1 / var2
p_val = f.cdf(f_stat, n1 - 1, n2 - 1)
alpha = 0.05
if p_val < alpha / 2 or p_val > 1 - alpha / 2:
    print("Reject the null hypothesis; \n The variances are significantly different.")
else:
    print("Fail to reject the null hypothesis; \n There is no significant difference between the variances.")

Fail to reject the null hypothesis; 
 There is no significant difference between the variances.


In [17]:
# answer 5
import numpy as np
from scipy.stats import f

h0="The variance is 0.005 (manufacture's claim)"
ha="The variance is 0.006"

n = 25
alpha = 0.01
population_var = 0.005
sample_var = 0.006

# Compute the F-statistic and p-value using the samples
f_stat = sample_var / population_var
p_val = f.cdf(f_stat, n - 1, np.inf)

if p_val < alpha:
    print("Reject the null hypothesis; \n ",ha)
else:
    print("Fail to reject the null hypothesis; \n ",h0)

Reject the null hypothesis; 
  The variance is 0.006


In [18]:
# answer 6
import numpy as np
from scipy.stats import f

def f_dist_mean_var(df_num, df_denom):
    mean = df_denom / (df_denom - 2)
    variance = (2 * (df_denom**2) * (df_num + df_denom - 2)) / ((df_num * (df_denom - 2)**2 * (df_denom - 4)))
    return (mean, variance)

# degrees of freedom as an input for numerator and denominator respectively
n,d=6,8
mean, variance = f_dist_mean_var(n,d)

print("Mean: ", mean)
print("Variance: {:.4f}".format(variance))

Mean:  1.3333333333333333
Variance: 1.7778


In [19]:
# answer 7
from scipy.stats import f

h0="The variances are not different."
ha="The variances are significantly different."

# given data mean and variance of the two samples 
n1 = 10
n2 = 15
s1_var = 25
s2_var = 20
# significanve level
alpha=0.10

f_stat = s1_var / s2_var
p_val = f.cdf(f_stat, n1 - 1, n2 - 1)

if p_val < alpha:
    print("Reject the Null hypotheses \n ",ha)
else:
    print("Fail to reject the null hypotheses \n ",h0)

Fail to reject the null hypotheses 
  The variances are not different.


In [20]:
# answer 8
from scipy.stats import f

h0="The variances are not different."
ha="The variances are significantly different."

# given data of restuarants a and b
a = [24, 25, 28, 23, 22, 20, 27]
b = [31, 33, 35, 30, 32, 36]

n1 = len(a)
n2 = len(b)
alpha = 0.05

s1_var = sum((x - sum(a)/n1)**2 for x in a) / (n1 - 1)
s2_var = sum((x - sum(b)/n2)**2 for x in b) / (n2 - 1)


f_stat = s1_var / s2_var
p_val = f.cdf(f_stat, n1 - 1, n2 - 1)

if p_val < alpha:
    print("Reject the Null hypotheses \n ",ha)
else:
    print("Fail to reject the null hypotheses \n ",h0)

Fail to reject the null hypotheses 
  The variances are not different.


In [21]:
# answer 9
from scipy.stats import f_oneway

h0="The variances are not different."
ha="The variances are significantly different."

group_a = [24, 25, 28, 23, 22, 20, 27]
group_b = [31, 33, 35, 30, 32, 36]

f_statistic, p_value = f_oneway(group_a, group_b)
alpha = 0.01

if p_val < alpha:
    print("Reject the Null hypotheses \n ",ha)
else:
    print("Fail to reject the null hypotheses \n ",h0)

Fail to reject the null hypotheses 
  The variances are not different.


In [22]:
# answer 9
from scipy.stats import f_oneway

h0="The variances are not different."
ha="The variances are significantly different."

alpha = 0.01
group_a = [80, 85, 90, 92, 87, 83]
group_b = [75, 78, 82, 79, 81, 84]

f_statistic, p_value = f_oneway(group_a, group_b)

if p_val < alpha:
    print("Reject the Null hypotheses \n ",ha)
else:
    print("Fail to reject the null hypotheses \n ",h0)

Fail to reject the null hypotheses 
  The variances are not different.
