In [None]:
Q1. Calculate the 95% confidence interval for a sample of data with a mean of 50 and a standard deviation
of 5 using Python. Interpret the results.

In [2]:
import scipy.stats as stats

sample_mean = 50 #point error
sample_stddev = 5
n = 100 # sample size
alpha = 0.05 # significance level

# ci = pointerror(mean) +or- margin_errror
# margin_errror= (zalpha/2)*alpha/root(n)

z= stats.t.ppf(1 - alpha/2, n-1)
margin_error = z* sample_stddev / (n ** 0.5)

lower_ci = sample_mean - margin_error
upper_ci = sample_mean + margin_error

print("95% Confidence Interval: ({:.2f}, {:.2f})".format(lower_ci, upper_ci))


95% Confidence Interval: (49.01, 50.99)


In [None]:
Q2. Conduct a chi-square goodness of fit test to determine if the distribution of colors of M&Ms in a bag
matches the expected distribution of 20% blue, 20% orange, 20% green, 10% yellow, 10% red, and 20%
brown. Use Python to perform the test with a significance level of 0.05.

In [None]:
import numpy as np
from scipy.stats import chisquare

# Define the observed frequencies of each color
observed_freq = [13, 18, 9, 9, 12, 19]

# Define the expected frequencies of each color based on the given distribution
expected_freq = [0.2*len(observed_freq)] * 6

# Calculate the chi-square test statistic and p-value
chi2, p = chisquare(observed_freq, expected_freq)

# Print the results
print("Chi-square test statistic:", chi2)
print("p-value:", p)

# Compare the p-value to the significance level and make a conclusion
alpha = 0.05
if p < alpha:
    print("Reject the null hypothesis that the distribution of colors matches the expected distribution.")
else:
    print("Fail to reject the null hypothesis that the distribution of colors matches the expected distribution.")


In [None]:
Q3. Use Python to calculate the chi-square statistic and p-value for a contingency table with the following
data:

                   Group A     Group B
Outcome 1          20             15
Outcome 2          10             25
Outcome 3          15             20
Interpret the results of the test.

In [None]:
import numpy as np
from scipy.stats import chi2_contingency

observed = np.array([[20, 15], [10, 25], [15, 20]])

chi2, p, dof, expected = chi2_contingency(observed)

print(f"Chi-square statistic: {chi2:.2f}")
print(f"P-value: {p:.4f}")


In [None]:
Q4. A study of the prevalence of smoking in a population of 500 individuals found that 60 individuals
smoked. Use Python to calculate the 95% confidence interval for the true proportion of individuals in the
population who smoke.

In [None]:
import math

sample_size = 500
sample_proportion = 60 / sample_size
confidence_level = 0.95

# Calculate the critical value from the standard normal distribution
z = 1.96

# Calculate the confidence interval
margin_of_error = z * math.sqrt(sample_proportion * (1 - sample_proportion) / sample_size)
confidence_interval = (sample_proportion - margin_of_error, sample_proportion + margin_of_error)

# Print the results
print("Sample proportion:", sample_proportion)
print("Confidence interval: [{:.4f}, {:.4f}]".format(confidence_interval[0], confidence_interval[1]))


In [None]:
Q5. Calculate the 90% confidence interval for a sample of data with a mean of 75 and a standard deviation
of 12 using Python. Interpret the results.

In [None]:
import math

sample_mean = 75
sample_std = 12
sample_size = 100
confidence_level = 0.90

# Calculate the critical value from the standard normal distribution
z = 1.645

# Calculate the confidence interval
margin_of_error = z * sample_std / math.sqrt(sample_size)
confidence_interval = (sample_mean - margin_of_error, sample_mean + margin_of_error)

# Print the results
print("Sample mean:", sample_mean)
print("Confidence interval: [{:.4f}, {:.4f}]".format(confidence_interval[0], confidence_interval[1]))


In [None]:
Q6.Use Python to plot the chi-square distribution with 10 degrees of freedom. Label the axes and shade the
area corresponding to a chi-square statistic of 15.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import chi2

# Define the degrees of freedom
df = 10

# Define the range of the x-axis
x = np.linspace(0, 30, 500)

# Plot the chi-square distribution
plt.plot(x, chi2.pdf(x, df), 'b-', lw=2, label='chi2 pdf')

# Shade the area corresponding to a chi-square statistic of 15
x_shade = np.linspace(15, 30, 500)
plt.fill_between(x_shade, chi2.pdf(x_shade, df), color='gray', alpha=0.5)

# Add labels to the plot
plt.xlabel('Chi-square statistic')
plt.ylabel('Probability density')
plt.title('Chi-square distribution with 10 degrees of freedom')

# Show the plot
plt.show()


In [None]:
Q7. A random sample of 1000 people was asked if they preferred Coke or Pepsi. Of the sample, 520
preferred Coke. Calculate a 99% confidence interval for the true proportion of people in the population who
prefer Coke.

In [None]:
import math

sample_proportion = 520/1000
sample_size = 1000
confidence_level = 0.99

# Calculate the critical value from the standard normal distribution
z = 2.576

# Calculate the confidence interval
margin_of_error = z * math.sqrt((sample_proportion * (1 - sample_proportion)) / sample_size)
confidence_interval = (sample_proportion - margin_of_error, sample_proportion + margin_of_error)

# Print the results
print("Sample proportion:", sample_proportion)
print("Confidence interval: [{:.4f}, {:.4f}]".format(confidence_interval[0], confidence_interval[1]))


In [None]:
Q8. A researcher hypothesizes that a coin is biased towards tails. They flip the coin 100 times and observe
45 tails. Conduct a chi-square goodness of fit test to determine if the observed frequencies match the
expected frequencies of a fair coin. Use a significance level of 0.05.

In [None]:
import scipy.stats as stats

observed_freq = [55, 45] # tails and heads respectively
expected_freq = [50, 50] # expected frequencies for a fair coin

# Calculate the chi-square test statistic and p-value
chi2, p_value = stats.chisquare(observed_freq, expected_freq)

# Print the results
print("Chi-square test statistic:", chi2)
print("p-value:", p_value)

# Check if p-value is less than significance level
alpha = 0.05
if p_value < alpha:
    print("Reject null hypothesis: The coin is biased towards tails.")
else:
    print("Fail to reject null hypothesis: The coin is fair.")


In [None]:
Q9. A study was conducted to determine if there is an association between smoking status (smoker or
non-smoker) and lung cancer diagnosis (yes or no). The results are shown in the contingency table below.
Conduct a chi-square test for independence to determine if there is a significant association between
smoking status and lung cancer diagnosis.

In [None]:
from scipy.stats import chi2_contingency

obs = [[60, 140], [30, 170]]
stat, p, dof, expected = chi2_contingency(obs)
print("Chi-square statistic: ", stat)
print("p-value: ", p)


In [None]:
Q10. A study was conducted to determine if the proportion of people who prefer milk chocolate, dark
chocolate, or white chocolate is different in the U.S. versus the U.K. A random sample of 500 people from
the U.S. and a random sample of 500 people from the U.K. were surveyed. The results are shown in the
contingency table below. Conduct a chi-square test for independence to determine if there is a significant
association between chocolate preference and country of origin.

In [None]:
from scipy.stats import chi2_contingency

obs = [[200, 150, 150], [225, 175, 100]]
stat, p, dof, expected = chi2_contingency(obs)
print("Chi-square statistic: ", stat)
print("p-value: ", p)


In [None]:
Q11. A random sample of 30 people was selected from a population with an unknown mean and standard
deviation. The sample mean was found to be 72 and the sample standard deviation was found to be 10.
Conduct a hypothesis test to determine if the population mean is significantly different from 70. Use a
significance level of 0.05.

In [None]:
from scipy.stats import ttest_1samp

sample_mean = 72
pop_mean = 70
sample_std = 10
n = 30
significance_level = 0.05

t_stat, p_val = ttest_1samp(data, pop_mean)

print("t-statistic:", t_stat)
print("p-value:", p_val/2) #Divide by 2 for a one-tailed test
