In [1]:
#Statistics_Advance_Assigment_7
#Question.1 Write a Python function that takes in two arrays of data and calculates the F-value for a variance ratio
#test. The function should return the F-value and the corresponding p-value for the test.
#Answer.1 : 
import numpy as np
import scipy.stats as stats

def variance_ratio_test(data1, data2):
    """
    Calculate F-value and p-value for a variance ratio test (F-test).
    
    Parameters:
    data1 (array-like): First array of data.
    data2 (array-like): Second array of data.
    
    Returns:
    f_value (float): Calculated F-value.
    p_value (float): Corresponding p-value.
    """
    # Calculate sample variances
    var1 = np.var(data1, ddof=1)
    var2 = np.var(data2, ddof=1)
    
    # Ensure larger variance is in the numerator
    if var1 < var2:
        var1, var2 = var2, var1
        data1, data2 = data2, data1
    
    # Calculate F-value
    f_value = var1 / var2
    
    # Calculate degrees of freedom
    df1 = len(data1) - 1
    df2 = len(data2) - 1
    
    # Calculate p-value
    p_value = 1 - stats.f.cdf(f_value, df1, df2)
    
    return f_value, p_value

# Example usage
data1 = [10, 15, 20, 25, 30]
data2 = [12, 18, 22, 28, 35]
f_value, p_value = variance_ratio_test(data1, data2)

print("F-value:", f_value)
print("p-value:", p_value)


F-value: 1.264
p-value: 0.4129405594579917


In [2]:
#Question.2 Given a significance level of 0.05 and the degrees of freedom for the numerator and denominator of an
#F-distribution, write a Python function that returns the critical F-value for a two-tailed test.
#Answer.2 : 
import scipy.stats as stats

def critical_f_value(alpha, dfn, dfd):
    """
    Calculate the critical F-value for a two-tailed test.
    
    Parameters:
    alpha (float): Significance level.
    dfn (int): Degrees of freedom for the numerator.
    dfd (int): Degrees of freedom for the denominator.
    
    Returns:
    critical_f (float): Calculated critical F-value.
    """
    # Calculate the upper and lower percentiles for the two-tailed test
    lower_percentile = alpha / 2
    upper_percentile = 1 - (alpha / 2)
    
    # Calculate the critical F-values for the percentiles
    critical_f_lower = stats.f.ppf(lower_percentile, dfn, dfd)
    critical_f_upper = stats.f.ppf(upper_percentile, dfn, dfd)
    
    return critical_f_lower, critical_f_upper

# Example usage
alpha = 0.05
dfn = 3
dfd = 12
critical_f_lower, critical_f_upper = critical_f_value(alpha, dfn, dfd)

print("Critical F-value (Lower):", critical_f_lower)
print("Critical F-value (Upper):", critical_f_upper)


Critical F-value (Lower): 0.06975177682217751
Critical F-value (Upper): 4.474184809637748


In [3]:
#Question.3 Write a Python program that generates random samples from two normal distributions with known
#variances and uses an F-test to determine if the variances are equal. The program should output the F-
#value, degrees of freedom, and p-value for the test.
#Answer.3 : 
import numpy as np
import scipy.stats as stats

def f_test_equal_variances(sample1, sample2):
    """
    Perform an F-test to determine if two samples have equal variances.
    
    Parameters:
    sample1 (array-like): First sample data.
    sample2 (array-like): Second sample data.
    
    Returns:
    f_value (float): Calculated F-value.
    df1 (int): Degrees of freedom for the numerator.
    df2 (int): Degrees of freedom for the denominator.
    p_value (float): Corresponding p-value.
    """
    # Calculate sample variances
    var1 = np.var(sample1, ddof=1)
    var2 = np.var(sample2, ddof=1)
    
    # Ensure larger variance is in the numerator
    if var1 < var2:
        var1, var2 = var2, var1
        sample1, sample2 = sample2, sample1
    
    # Calculate F-value
    f_value = var1 / var2
    
    # Calculate degrees of freedom
    df1 = len(sample1) - 1
    df2 = len(sample2) - 1
    
    # Calculate p-value
    p_value = 2 * min(stats.f.cdf(f_value, df1, df2), 1 - stats.f.cdf(f_value, df1, df2))
    
    return f_value, df1, df2, p_value

# Example usage
np.random.seed(42)  # Set seed for reproducibility

# Generate random samples from normal distributions with known variances
sample_size = 30
variance1 = 25
variance2 = 36
sample1 = np.random.normal(loc=0, scale=np.sqrt(variance1), size=sample_size)
sample2 = np.random.normal(loc=0, scale=np.sqrt(variance2), size=sample_size)

# Perform F-test for equal variances
f_value, df1, df2, p_value = f_test_equal_variances(sample1, sample2)

print("F-value:", f_value)
print("Degrees of Freedom (Numerator):", df1)
print("Degrees of Freedom (Denominator):", df2)
print("p-value:", p_value)


F-value: 1.5412247469617144
Degrees of Freedom (Numerator): 29
Degrees of Freedom (Denominator): 29
p-value: 0.2500044667545791


In [4]:
#Question.4 The variances of two populations are known to be 10 and 15. A sample of 12 observations is taken from
#each population. Conduct an F-test at the 5% significance level to determine if the variances are
#significantly different.
#Answer.4 : 
import scipy.stats as stats

# Given variances and sample sizes
variance1 = 10
variance2 = 15
sample_size = 12

# Calculate the F-statistic
f_statistic = variance1 / variance2

# Degrees of freedom for the two samples
df1 = sample_size - 1
df2 = sample_size - 1

# Calculate the critical F-value at the 5% significance level
alpha = 0.05
critical_f_value = stats.f.ppf(1 - alpha, df1, df2)

# Conduct the F-test
p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

# Determine if the variances are significantly different
if f_statistic > critical_f_value:
    result = "Variances are significantly different"
else:
    result = "Variances are not significantly different"

# Print the results
print("F-Statistic:", f_statistic)
print("Critical F-Value:", critical_f_value)
print("p-value:", p_value)
print(result)


F-Statistic: 0.6666666666666666
Critical F-Value: 2.8179304699530863
p-value: 0.7438051006321003
Variances are not significantly different


In [5]:
#Question.5 A manufacturer claims that the variance of the diameter of a certain product is 0.005. A sample of 25
#products is taken, and the sample variance is found to be 0.006. Conduct an F-test at the 1% significance
#level to determine if the claim is justified.
#Answer.5 : 
import scipy.stats as stats

# Given values
claimed_variance = 0.005
sample_variance = 0.006
sample_size = 25

# Degrees of freedom for the sample and claimed population
df_sample = sample_size - 1
df_claimed = 1

# Calculate the F-statistic
f_statistic = sample_variance / claimed_variance

# Calculate the critical F-value at the 1% significance level
alpha = 0.01
critical_f_value = stats.f.ppf(1 - alpha, df_sample, df_claimed)

# Conduct the F-test
p_value = 1 - stats.f.cdf(f_statistic, df_sample, df_claimed)

# Determine if the claim is justified
if f_statistic < critical_f_value:
    result = "Claim is justified"
else:
    result = "Claim is not justified"

# Print the results
print("F-Statistic:", f_statistic)
print("Critical F-Value:", critical_f_value)
print("p-value:", p_value)
print(result)


F-Statistic: 1.2
Critical F-Value: 6234.6308935330835
p-value: 0.6296099619959358
Claim is justified


In [6]:
#Question.6 Write a Python function that takes in the degrees of freedom for the numerator and denominator of an
#F-distribution and calculates the mean and variance of the distribution. The function should return the
#mean and variance as a tuple.
#Answer.6 : 
def f_distribution_mean_variance(dfn, dfd):
    """
    Calculate the mean and variance of an F-distribution.
    
    Parameters:
    dfn (int): Degrees of freedom for the numerator.
    dfd (int): Degrees of freedom for the denominator.
    
    Returns:
    mean (float): Mean of the F-distribution.
    variance (float): Variance of the F-distribution.
    """
    # Ensure degrees of freedom are positive integers
    if dfn <= 0 or dfd <= 0:
        raise ValueError("Degrees of freedom must be positive integers.")
    
    # Calculate mean and variance
    if dfd > 2:
        mean = dfd / (dfd - 2)
        variance = (2 * (dfd ** 2) * (dfn + dfd - 2)) / (dfn * (dfd - 2) ** 2 * (dfd - 4))
    else:
        mean = float('nan')
        variance = float('nan')
    
    return mean, variance

# Example usage
dfn = 3
dfd = 20
mean, variance = f_distribution_mean_variance(dfn, dfd)

print("Mean:", mean)
print("Variance:", variance)


Mean: 1.1111111111111112
Variance: 1.0802469135802468


In [7]:
#Question.7 A random sample of 10 measurements is taken from a normal population with unknown variance. The
#sample variance is found to be 25. Another random sample of 15 measurements is taken from another
#normal population with unknown variance, and the sample variance is found to be 20. Conduct an F-test
#at the 10% significance level to determine if the variances are significantly different.
#Answer.7 : 
import scipy.stats as stats

# Given sample variances and sample sizes
sample_variance1 = 25
sample_variance2 = 20
sample_size1 = 10
sample_size2 = 15

# Degrees of freedom for the two samples
df1 = sample_size1 - 1
df2 = sample_size2 - 1

# Calculate the F-statistic
f_statistic = sample_variance1 / sample_variance2

# Calculate the critical F-value at the 10% significance level
alpha = 0.10
critical_f_value = stats.f.ppf(1 - alpha, df1, df2)

# Conduct the F-test
p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

# Determine if the variances are significantly different
if f_statistic > critical_f_value:
    result = "Variances are significantly different"
else:
    result = "Variances are not significantly different"

# Print the results
print("F-Statistic:", f_statistic)
print("Critical F-Value:", critical_f_value)
print("p-value:", p_value)
print(result)


F-Statistic: 1.25
Critical F-Value: 2.121954566976902
p-value: 0.3416097191292977
Variances are not significantly different


In [8]:
#Question.8 The following data represent the waiting times in minutes at two different restaurants on a Saturday
#night: Restaurant A: 24, 25, 28, 23, 22, 20, 27; Restaurant B: 31, 33, 35, 30, 32, 36. Conduct an F-test at the 5%
#significance level to determine if the variances are significantly different.
#Answer.8 : 
import numpy as np
import scipy.stats as stats

# Data for the two restaurants
data_restaurant_A = np.array([24, 25, 28, 23, 22, 20, 27])
data_restaurant_B = np.array([31, 33, 35, 30, 32, 36])

# Sample variances and sample sizes
sample_variance_A = np.var(data_restaurant_A, ddof=1)
sample_variance_B = np.var(data_restaurant_B, ddof=1)
sample_size_A = len(data_restaurant_A)
sample_size_B = len(data_restaurant_B)

# Degrees of freedom for the two samples
df1 = sample_size_A - 1
df2 = sample_size_B - 1

# Calculate the F-statistic
f_statistic = sample_variance_A / sample_variance_B

# Calculate the critical F-value at the 5% significance level
alpha = 0.05
critical_f_value = stats.f.ppf(1 - alpha, df1, df2)

# Conduct the F-test
p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

# Determine if the variances are significantly different
if f_statistic > critical_f_value:
    result = "Variances are significantly different"
else:
    result = "Variances are not significantly different"

# Print the results
print("F-Statistic:", f_statistic)
print("Critical F-Value:", critical_f_value)
print("p-value:", p_value)
print(result)


F-Statistic: 1.4551907719609583
Critical F-Value: 4.950288068694318
p-value: 0.3487407873968742
Variances are not significantly different


In [9]:
#Question.9 The following data represent the test scores of two groups of students: Group A: 80, 85, 90, 92, 87, 83;
#Group B: 75, 78, 82, 79, 81, 84. Conduct an F-test at the 1% significance level to determine if the variances
#are significantly different.
#Answer.9 : 
import numpy as np
import scipy.stats as stats

# Data for the two groups
data_group_A = np.array([80, 85, 90, 92, 87, 83])
data_group_B = np.array([75, 78, 82, 79, 81, 84])

# Sample variances and sample sizes
sample_variance_A = np.var(data_group_A, ddof=1)
sample_variance_B = np.var(data_group_B, ddof=1)
sample_size_A = len(data_group_A)
sample_size_B = len(data_group_B)

# Degrees of freedom for the two samples
df1 = sample_size_A - 1
df2 = sample_size_B - 1

# Calculate the F-statistic
f_statistic = sample_variance_A / sample_variance_B

# Calculate the critical F-value at the 1% significance level
alpha = 0.01
critical_f_value = stats.f.ppf(1 - alpha, df1, df2)

# Conduct the F-test
p_value = 1 - stats.f.cdf(f_statistic, df1, df2)

# Determine if the variances are significantly different
if f_statistic > critical_f_value:
    result = "Variances are significantly different"
else:
    result = "Variances are not significantly different"

# Print the results
print("F-Statistic:", f_statistic)
print("Critical F-Value:", critical_f_value)
print("p-value:", p_value)
print(result)


F-Statistic: 1.9442622950819677
Critical F-Value: 10.967020650907992
p-value: 0.2415521774535344
Variances are not significantly different
