# Q1. Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio test. The function should return the F-value and the corresponding p-value for the test.

In [1]:
import numpy as np
import scipy.stats as stats
def variance_ratio_test(data1, data2):
    """
    Perform a variance ratio test (F-test) for equal variances between two datasets.
    Parameters:
    data1 (array-like): The first dataset.
    data2 (array-like): The second dataset.
    Returns:
    f_value (float): The calculated F-value.
    p_value (float): The corresponding p-value for the test.
    """
    # Calculate the sample variances for both datasets
    var1 = np.var(data1, ddof=1)  # ddof=1 for sample variance
    var2 = np.var(data2, ddof=1)
    # Ensure var1 is the larger variance
    if var2 > var1:
        var1, var2 = var2, var1
    # Calculate the F-value
    f_value = var1 / var2
    # Calculate the degrees of freedom for each dataset
    df1 = len(data1) - 1
    df2 = len(data2) - 1
    # Calculate the p-value using the F-distribution
    p_value = 1 - stats.f.cdf(f_value, df1, df2)
    return f_value, p_value

# Example usage:
data1 = [23, 27, 30, 21, 25]
data2 = [18, 22, 19, 30, 28]

f_value, p_value = variance_ratio_test(data1, data2)
print("F-value:", f_value)
print("p-value:", p_value)



F-value: 2.3606557377049184
p-value: 0.2129339968949956


# Q2. Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an F-distribution, write a Python function that returns the critical F-value for a two-tailed test.

In [2]:
import scipy.stats as stats
def get_critical_f_value(alpha, df1, df2):
    """
    Calculate the critical F-value for a two-tailed test given a significance level (alpha)
    and degrees of freedom for the numerator (df1) and denominator (df2) of an F-distribution.

    Parameters:
    alpha (float): The significance level (e.g., 0.05 for a 95% confidence level).
    df1 (int): Degrees of freedom for the numerator.
    df2 (int): Degrees of freedom for the denominator.

    Returns:
    critical_f_value (float): The critical F-value for the two-tailed test.
    """
    # Calculate the critical F-value for a two-tailed test
    critical_f_value = stats.f.ppf(1 - alpha / 2, df1, df2)
    return critical_f_value
# Example usage:
alpha = 0.05  # Significance level
df1 = 3       # Degrees of freedom for the numerator
df2 = 20      # Degrees of freedom for the denominator
critical_f = get_critical_f_value(alpha, df1, df2)
print("Critical F-Value:", critical_f)

Critical F-Value: 3.8586986662732143


# Q3. Write a Python program that generates random samples from two normal distributions with known variances and uses an F-test to determine if the variances are equal. The program should output the F - value, degrees of freedom, and p-value for the test.

In [4]:
import numpy as np
import scipy.stats as stats
# Set the random seed for reproducibility
np.random.seed(0)
# Generate random samples from two normal distributions with known variances
variance1 = 5  # Variance of the first distribution
variance2 = 8  # Variance of the second distribution
sample_size1 = 30  # Sample size for the first distribution
sample_size2 = 30  # Sample size for the second distribution
# Generate random samples
sample1 = np.random.normal(0, np.sqrt(variance1), sample_size1)
sample2 = np.random.normal(0, np.sqrt(variance2), sample_size2)
# Perform an F-test for equality of variances
f_statistic, p_value = stats.ftest (sample1.var(ddof=1), sample2.var(ddof=1), sample_size1 - 1, sample_size2 - 1)
# Calculate degrees of freedom
df1 = sample_size1 - 1
df2 = sample_size2 - 1
# Print the results
print("F-Value:", f_statistic)
print("Degrees of Freedom (numerator, denominator):", df1, ",", df2)
print("p-Value:", p_value)
# Set the significance level (alpha)
alpha = 0.05
# Interpret the F-test results
if p_value < alpha:
    print("The p-value is less than the significance level (alpha).")
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("The p-value is greater than or equal to the significance level (alpha).")
    print("Fail to reject the null hypothesis: Variances are equal.")

AttributeError: module 'scipy.stats' has no attribute 'ftest'

# Q4.The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from each population. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

In [5]:
import scipy.stats as stats
# Known population variances
variance1 = 10  # Variance of Population 1
variance2 = 15  # Variance of Population 2
# Sample sizes
n1 = 12
n2 = 12
# Calculate the F-statistic
f_statistic = variance1 / variance2
# Degrees of freedom for the numerator and denominator
df1 = n1 - 1
df2 = n2 - 1
# Calculate the critical F-value for a two-tailed test at alpha = 0.05
alpha = 0.05
critical_f_value = stats.f.ppf(1 - alpha / 2, df1, df2)
# Perform the F-test
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))
# Print the results
print("F-Statistic:", f_statistic)
print("Degrees of Freedom (numerator, denominator):", df1, ",", df2)
print("Critical F-Value:", critical_f_value)
print("p-Value:", p_value)
# Interpret the F-test results
if p_value < alpha:
    print("The p-value is less than the significance level (alpha).")
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("The p-value is greater than or equal to the significance level (alpha).")
    print("Fail to reject the null hypothesis: Variances are equal.")

F-Statistic: 0.6666666666666666
Degrees of Freedom (numerator, denominator): 11 , 11
Critical F-Value: 3.473699051085809
p-Value: 0.5123897987357996
The p-value is greater than or equal to the significance level (alpha).
Fail to reject the null hypothesis: Variances are equal.


# Q5. A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25 products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance level to determine if the claim is justified.

In [6]:
import scipy.stats as stats
# Manufacturer's claim about the population variance
claimed_variance = 0.005
# Sample variance from the sample of 25 products
sample_variance = 0.006
# Sample size
n = 25
# Degrees of freedom for the numerator and denominator
df1 = n - 1
df2 = 1  # Since there's only one sample variance
# Calculate the F-statistic
f_statistic = sample_variance / claimed_variance
# Calculate the critical F-value for a two-tailed test at alpha = 0.01
alpha = 0.01
critical_f_value = stats.f.ppf(1 - alpha / 2, df1, df2)
# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))
# Print the results
print("F-Statistic:", f_statistic)
print("Degrees of Freedom (numerator, denominator):", df1, ",", df2)
print("Critical F-Value:", critical_f_value)
print("p-Value:", p_value)
# Interpret the F-test results
if p_value < alpha:
    print("The p-value is less than the significance level (alpha).")
    print("Reject the null hypothesis: The claim is not justified.")
else:
    print("The p-value is greater than or equal to the significance level (alpha).")
    print("Fail to reject the null hypothesis: The claim is justified.")

F-Statistic: 1.2
Degrees of Freedom (numerator, denominator): 24 , 1
Critical F-Value: 24939.565259943236
p-Value: 0.7407800760081285
The p-value is greater than or equal to the significance level (alpha).
Fail to reject the null hypothesis: The claim is justified.


# Q6. Write a Python function that takes in the degrees of freedom for the numerator and denominator of an F-distribution and calculates the mean and variance of the distribution. The function should return the mean and variance as a tuple.

In [7]:
def f_distribution_mean_and_variance(df1, df2):
    """
    Calculate the mean and variance of an F-distribution.

    Parameters:
    df1 (int): Degrees of freedom for the numerator.
    df2 (int): Degrees of freedom for the denominator.

    Returns:
    mean (float): Mean of the F-distribution.
    variance (float): Variance of the F-distribution.
    """
    
    if df1 <= 0 or df2 <= 0:
        raise ValueError("Degrees of freedom must be greater than zero.")
    
    if df2 <= 2:
        raise ValueError("Degrees of freedom for the denominator must be greater than 2.")
    
    if df1 >= 2:
        mean = df2 / (df2 - 2)
        variance = (2 * df2 ** 2 * (df1 + df2 - 2)) / (df1 * (df2 - 2) ** 2 * (df2 - 4))
    else:
        mean = float('inf')  # Mean is undefined when df1 < 2
        variance = float('inf')  # Variance is undefined when df1 < 2
    
    return mean, variance

# Example usage:
df1 = 3
df2 = 20

mean, variance = f_distribution_mean_and_variance(df1, df2)
print("Mean:", mean)
print("Variance:", variance)


Mean: 1.1111111111111112
Variance: 1.0802469135802468


# Q7. A random sample of 10 measurements is taken from a normal population with unknown variance. The sample variance is found to be 25. Another random sample of 15 measurements is taken from another normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test at the 10% significance level to determine if the variances are significantly different.

Sample 1: A random sample of 10 measurements with a sample variance of 25.

Sample 2: Another random sample of 15 measurements with a sample variance of 20.

Significance level (α): 10%

The null hypothesis (H0) is that the variances are equal
and the alternative hypothesis (H1) is that the variances are not equal 

In [8]:
import scipy.stats as stats
# Sample variances
sample_variance1 = 25
sample_variance2 = 20
# Sample sizes
n1 = 10
n2 = 15
# Calculate the F-statistic
f_statistic = sample_variance1 / sample_variance2
# Degrees of freedom for the numerator and denominator
df1 = n1 - 1
df2 = n2 - 1
# Calculate the critical F-value for a two-tailed test at alpha = 0.10
alpha = 0.10
critical_f_value = stats.f.ppf(1 - alpha / 2, df1, df2)
# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))
# Print the results
print("F-Statistic:", f_statistic)
print("Degrees of Freedom (numerator, denominator):", df1, ",", df2)
print("Critical F-Value:", critical_f_value)
print("p-Value:", p_value)
# Interpret the F-test results
if p_value < alpha:
    print("The p-value is less than the significance level (alpha).")
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("The p-value is greater than or equal to the significance level (alpha).")
    print("Fail to reject the null hypothesis: Variances are equal.")

F-Statistic: 1.25
Degrees of Freedom (numerator, denominator): 9 , 14
Critical F-Value: 2.6457907352338195
p-Value: 0.6832194382585954
The p-value is greater than or equal to the significance level (alpha).
Fail to reject the null hypothesis: Variances are equal.


# Q8. The following data represent the waiting times in minutes at two different restaurants on a Saturday night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5% significance level to determine if the variances are significantly different.

Restaurant A waiting times: 24, 25, 28, 23, 22, 20, 27

Restaurant B waiting times: 31, 33, 35, 30, 32, 36

If the variances of waiting times at the two restaurants are significantly different at a 5% significance level (α = 0.05). 

The null hypothesis (H0) is that the variances are equal.

The alternative hypothesis (H1) is that the variances are not equal.

In [9]:
import scipy.stats as stats
# Waiting times data for Restaurant A and Restaurant B
waiting_times_A = [24, 25, 28, 23, 22, 20, 27]
waiting_times_B = [31, 33, 35, 30, 32, 36]
# Calculate sample variances
variance_A = sum((x - sum(waiting_times_A) / len(waiting_times_A))**2 for x in waiting_times_A) / (len(waiting_times_A) - 1)
variance_B = sum((x - sum(waiting_times_B) / len(waiting_times_B))**2 for x in waiting_times_B) / (len(waiting_times_B) - 1)
# Calculate the F-statistic
f_statistic = variance_A / variance_B
# Degrees of freedom for the numerator and denominator
df1 = len(waiting_times_A) - 1
df2 = len(waiting_times_B) - 1
# Calculate the critical F-value for a two-tailed test at alpha = 0.05
alpha = 0.05
critical_f_value = stats.f.ppf(1 - alpha / 2, df1, df2)
# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))
# Print the results
print("F-Statistic:", f_statistic)
print("Degrees of Freedom (numerator, denominator):", df1, ",", df2)
print("Critical F-Value:", critical_f_value)
print("p-Value:", p_value)
# Interpret the F-test results
if p_value < alpha:
    print("The p-value is less than the significance level (alpha).")
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("The p-value is greater than or equal to the significance level (alpha).")
    print("Fail to reject the null hypothesis: Variances are equal.")

F-Statistic: 1.4551907719609583
Degrees of Freedom (numerator, denominator): 6 , 5
Critical F-Value: 6.977701858535566
p-Value: 0.6974815747937484
The p-value is greater than or equal to the significance level (alpha).
Fail to reject the null hypothesis: Variances are equal.


# Q9. The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83; Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances are significantly different.

Group A test scores: 80, 85, 90, 92, 87, 83

Group B test scores: 75, 78, 82, 79, 81, 84

If the variances of test scores in the two groups are significantly different at a 1% significance level (α=0.01).

The null hypothesis (H0) is that the variances are equal

The alternative hypothesis (H1) is that the variances are not equal

In [10]:
import scipy.stats as stats
# Test scores data for Group A and Group B
test_scores_A = [80, 85, 90, 92, 87, 83]
test_scores_B = [75, 78, 82, 79, 81, 84]
# Calculate sample variances
variance_A = sum((x - sum(test_scores_A) / len(test_scores_A))**2 for x in test_scores_A) / (len(test_scores_A) - 1)
variance_B = sum((x - sum(test_scores_B) / len(test_scores_B))**2 for x in test_scores_B) / (len(test_scores_B) - 1)
# Calculate the F-statistic
f_statistic = variance_A / variance_B
# Degrees of freedom for the numerator and denominator
df1 = len(test_scores_A) - 1
df2 = len(test_scores_B) - 1
# Calculate the critical F-value for a two-tailed test at alpha = 0.01
alpha = 0.01
critical_f_value = stats.f.ppf(1 - alpha / 2, df1, df2)
# Calculate the p-value
p_value = 2 * min(stats.f.cdf(f_statistic, df1, df2), 1 - stats.f.cdf(f_statistic, df1, df2))
# Print the results
print("F-Statistic:", f_statistic)
print("Degrees of Freedom (numerator, denominator):", df1, ",", df2)
print("Critical F-Value:", critical_f_value)
print("p-Value:", p_value)
# Interpret the F-test results
if p_value < alpha:
    print("The p-value is less than the significance level (alpha).")
    print("Reject the null hypothesis: Variances are not equal.")
else:
    print("The p-value is greater than or equal to the significance level (alpha).")
    print("Fail to reject the null hypothesis: Variances are equal.")

F-Statistic: 1.9442622950819677
Degrees of Freedom (numerator, denominator): 5 , 5
Critical F-Value: 14.939605459912224
p-Value: 0.4831043549070688
The p-value is greater than or equal to the significance level (alpha).
Fail to reject the null hypothesis: Variances are equal.
