# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.

In [5]:
import scipy.stats as stats
import numpy as np

In [6]:
def calculate_f_value(data1, data2):
    # Calculate the variance of each dataset
    var1 = data1.var()
    var2 = data2.var()

    # Ensure var1 is the larger variance
    if var2 > var1:
        var1, var2 = var2, var1
        data1, data2 = data2, data1

    # Calculate the F-value
    f_value = var1 / var2

    # Calculate degrees of freedom
    df1 = len(data1) - 1
    df2 = len(data2) - 1

    # Calculate the p-value
    p_value = 1 - stats.f.cdf(f_value, df1, df2)

    return f_value, p_value

In [7]:
d1=[3,2,8,9,6]
d2=[4,9,3,1,7,5,6]

data1=np.array(d1)
data2=np.array(d2)

f_value, p_value = calculate_f_value(data1, data2)

print("F-value:", f_value)
print("p-value:", p_value)

F-value: 1.24
p-value: 0.3868156564035148


# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [8]:

def critical_f_value(alpha, dfn, dfd):
    
    # Calculate the right-tailed F-value for alpha/2
    right_tail = stats.f.ppf(1 - alpha/2, dfn, dfd)

    # Calculate the left-tailed F-value for alpha/2
    left_tail = stats.f.ppf(alpha/2, dfn, dfd)

    return left_tail, right_tail


In [11]:
alpha = 0.05
dfn = 3  
dfd = 10  

left_critical, right_critical = critical_f_value(alpha, dfn, dfd)

print(f"Left Critical F-value:{left_critical:.4f}")
print(f"Right Critical F-value:{ right_critical:.4f}")


Left Critical F-value:0.0694
Right Critical F-value:4.8256


# Q3. Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the F- value, degrees of freedom, and p-value for the test.

In [19]:
import numpy as np
import scipy.stats as stats

In [21]:
np.random.seed(0)

# Generate random samples from two normal distributions with known variances
var1 = 4.0  
var2 = 9.0 
n1 = 30
n2 = 40

# Generate the random samples
sample1 = np.random.normal(0, np.sqrt(var1), n1) #(mean, std_dev, size)

sample2 = np.random.normal(0, np.sqrt(var2), n2)

# Perform an F-test to determine if the variances are equal
f_value = np.var(sample1, ddof=1) / np.var(sample2, ddof=1)

df1 = n1 - 1
df2 = n2 - 1


# min function is used to find the smaller of the two probabilities calculated 
# 2 multiply because two-tailed test
p_value = 2 * min(stats.f.cdf(f_value, df1, df2), 1 - stats.f.cdf(f_value, df1, df2))

# results
print("F-value:", f_value)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)


F-value: 0.7087959958093473
Degrees of freedom (numerator, denominator): 29 39
p-value: 0.3382337757135684


In [26]:
alpha = 0.05
if p_value < alpha:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis")
    print("conclusion: Variances are equal.")

Fail to reject the null hypothesis
conclusion: Variances are equal.


# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [27]:
# Given 
var1 = 10
var2 = 15
n = 12
alpha = 0.05

# Calculate the F-statistic
f_stat = var2 / var1

# Degrees of freedom
df1 = n - 1
df2 = n - 1

# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_stat, df1, df2), 1 - stats.f.cdf(f_stat, df1, df2))

# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: Variances are significantly different.")
else:
    print("Fail to reject the null hypothesis: Variances are not significantly different.")

print("F-statistic:", f_stat)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)


Fail to reject the null hypothesis: Variances are not significantly different.
F-statistic: 1.5
Degrees of freedom (numerator, denominator): 11 11
p-value: 0.5123897987357995


# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.

In [32]:
from scipy.stats import f

In [33]:
alpha = 0.01

# set the claimed population variance and sample variance
claimed_variance = 0.005
sample_variance = 0.006

# set the sample size
n = 25

# Null Hypothesis and alternate hypothesis
null_hypothesis = "The variance of the diameter of the product is 0.005"
alternate_hypothesis = "The variance of the diameter of the product is NOT 0.005."

# calculate the F-statistic
F = claimed_variance / sample_variance

# calculate the critical value of the F-distribution
dfn = n - 1
dfd = n - 1

# calculate the p-value
p_value = 2 * min(f.cdf(F, dfn, dfd), 1 - f.cdf(F, dfn, dfd))

# Calculates critical values for two tailed F-test
alpha = 0.01
F_crit1 = f.ppf(alpha/2, dfn, dfd)
F_crit2 = f.ppf(1-alpha/2,dfn, dfd)

# print the results
print(f"F-statistic: {F:.4f}")
print(f"F Critical values: {F_crit1:.4f} and {F_crit2:.4f}")
print(f"P-value: {p_value:.4f}")

if p_value < alpha:
    print("Reject null hypothesis.")
    print(f"Conclusion : {alternate_hypothesis}")
else:
    print("FAIL to reject null hypothesis.")
    print(f"Conclusion : {null_hypothesis}")


F-statistic: 0.8333
F Critical values: 0.3371 and 2.9667
P-value: 0.6587
FAIL to reject null hypothesis.
Conclusion : The variance of the diameter of the product is 0.005


# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the mean and variance as a tuple.

### Ans

$$\text{Mean} = \frac{df_2}{df_2 - 2}$$

$$\text{Variance} = \frac{2 df_2^2 (df_1 + df_2 - 2)}{df_1 (df_2 - 2)^2 (df_2 - 4)}$$

In [37]:
def f_dist_mean_var(df1:int, df2:int):
   
    if df1 <= 0 or df2 <= 0:
        raise ValueError("Degrees of freedom must be positive.")
    
    if df2 <= 2:
        raise ValueError("Degrees of freedom for the denominator (df2) must be greater than 2.")

    mean = df2 / (df2 - 2)
    
    var = (2 * df2 ** 2 * (df1 + df2 - 2)) / (df1 * (df2 - 2) ** 2 * (df2 - 4))
    
    return (mean, var)

In [42]:
df1 = 5
df2 = 10

mean, variance = f_dist_mean_var(df1, df2)

print(f"Mean: {mean:.2f}")
print(f"Variance: {variance:.2f}")

Mean: 1.25
Variance: 1.35


# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.

In [47]:
# Given values
var1 = 25
var2 = 20
n1 = 10
n2 = 15
alpha = 0.10


null_hypothesis = "Variances are equal"
alternate_hypothesis =  "Variances are not equal"


# Calculate the F-statistic
f_statistic =var1 / var2

# Degrees of freedom
df1 = n1 - 1
df2 = n2- 1

# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))

print("F-statistic:", f_statistic)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)

# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: ",alternate_hypothesis)
else:
    print("Fail to reject the null hypothesis:",null_hypothesis )


F-statistic: 1.25
Degrees of freedom (numerator, denominator): 9 14
p-value: 0.6832194382585954
Fail to reject the null hypothesis: Variances are equal


# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [49]:
# Data for Restaurant A and Restaurant B
A = np.array([24, 25, 28, 23, 22, 20, 27])
B = np.array([31, 33, 35, 30, 32, 36])

# Given significance level
alpha = 0.05

# Calculate sample variances
var_A = np.var(A, ddof=1)
var_B = np.var(B, ddof=1)

# Calculate the F-statistic
f_statistic = var_A / var_B

# Degrees of freedom
df1 = len(A) - 1
df2 = len(B) - 1

# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))


print("F-statistic:", f_statistic)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)


F-statistic: 1.4551907719609583
Degrees of freedom (numerator, denominator): 6 5
p-value: 0.6974815747937484


In [50]:
# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis: Variances are equal.")

Fail to reject the null hypothesis: Variances are equal.


# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different.

In [52]:
A = np.array([80, 85, 90, 92, 87, 83])
B = np.array([75, 78, 82, 79, 81, 84])

# Given significance level
alpha = 0.01

# Calculate sample variances
var_A = np.var(A, ddof=1)
var_B = np.var(B, ddof=1)

# Calculate the F-statistic
f_statistic = var_A / var_B

# Degrees of freedom
df1 = len(A) - 1
df2 = len(B) - 1

# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))


print("F-statistic:", f_statistic)
print("Degrees of freedom (numerator, denominator):", df1, df2)
print("p-value:", p_value)


F-statistic: 1.9442622950819677
Degrees of freedom (numerator, denominator): 5 5
p-value: 0.4831043549070688


In [53]:

# Make a decision
if p_value < alpha:
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("Fail to reject the null hypothesis: Variances are equal.")

Fail to reject the null hypothesis: Variances are equal.
