In [13]:
import pandas as pd
import numpy as np
from scipy.stats import gamma

# Load the Excel file
df = pd.read_excel("newgame.xlsx")

# Extract Global_Sales and filter positive values
global_sales = df['Global_Sales']
global_sales = global_sales[global_sales > 0].dropna()

# Method of Moments Estimation
sample_mean = global_sales.mean()
sample_var = global_sales.var()

# MoM formulas for Gamma(a, b)
b_mom = sample_var / sample_mean
a_mom = sample_mean / b_mom

print("Method of Moments Estimates:")
print(f"a (shape) ≈ {a_mom:.4f}")
print(f"b (scale) ≈ {b_mom:.4f}")

# Maximum Likelihood Estimation (fit Gamma with loc=0)
a_mle, loc_mle, b_mle = gamma.fit(global_sales, floc=0)

print("\nMaximum Likelihood Estimates:")
print(f"a (shape) ≈ {a_mle:.4f}")
print(f"b (scale) ≈ {b_mle:.4f}")


Method of Moments Estimates:
a (shape) ≈ 2.1766
b (scale) ≈ 6.7504

Maximum Likelihood Estimates:
a (shape) ≈ 3.9330
b (scale) ≈ 3.7359


In [14]:
import pandas as pd
from scipy.stats import chi2

# Sample statistics
n = len(global_sales)
sample_var = global_sales.var(ddof=1)  # unbiased estimator

# Confidence level
alpha = 0.05

# Chi-squared critical values (corrected)
chi2_lower = chi2.ppf(alpha / 2, df=n - 1)       # lower tail
chi2_upper = chi2.ppf(1 - alpha / 2, df=n - 1)   # upper tail

# Confidence interval for the variance
lower_bound = (n - 1) * sample_var / chi2_upper  # note: upper goes with lower_bound
upper_bound = (n - 1) * sample_var / chi2_lower  # note: lower goes with upper_bound

print(f"95% Confidence Interval for the Variance: ({lower_bound:.4f}, {upper_bound:.4f})")



95% Confidence Interval for the Variance: (76.3680, 134.0711)


In [6]:
import pandas as pd
import numpy as np
from scipy import stats

# Load Excel file
df = pd.read_excel('newgame.xlsx')  # Ensure this file is in your working directory

# -------------------------------
# Part 1: Confidence Interval for Difference in Means
# -------------------------------

# Choose two non-overlapping genres
genre1 = 'Sports'
genre2 = 'Platform'

# Filter data for each genre
sales1 = df[df['Genre'] == genre1]['Global_Sales'].dropna()
sales2 = df[df['Genre'] == genre2]['Global_Sales'].dropna()

# Calculate means, standard deviations, and sample sizes
n1, n2 = len(sales1), len(sales2)
mean1, std1 = sales1.mean(), sales1.std(ddof=1)
mean2, std2 = sales2.mean(), sales2.std(ddof=1)

# Compute standard error and margin of error
se_diff = np.sqrt(std1**2 / n1 + std2**2 / n2)
z_critical = stats.norm.ppf(0.975)  # 95% CI
margin_of_error = z_critical * se_diff

# Confidence Interval
ci_lower = (mean1 - mean2) - margin_of_error
ci_upper = (mean1 - mean2) + margin_of_error

# Output for Part 1
print("=== 95% Confidence Interval for Difference in Means ===")
print(f"Genre 1: {genre1} | Mean: {mean1:.3f}")
print(f"Genre 2: {genre2} | Mean: {mean2:.3f}")
print(f"95% CI for (Mean of {genre1} - Mean of {genre2}): ({ci_lower:.3f}, {ci_upper:.3f})\n")




=== 95% Confidence Interval for Difference in Means ===
Genre 1: Sports | Mean: 29.502
Genre 2: Platform | Mean: 15.745
95% CI for (Mean of Sports - Mean of Platform): (-8.942, 36.456)



In [9]:
# -------------------------------
# Part 2: Hypothesis Test for Binary Response (Bernoulli)
# -------------------------------

# Define binary response variable: success if Global_Sales > 10 million
df['Success'] = (df['Global_Sales'] > 10).astype(int)

# Sample proportion and size
p_hat = df['Success'].mean()
n = len(df['Success'])
p0 = 0.5  # Null hypothesis value

# Z-test statistic and p-value
z_stat = (p_hat - p0) / np.sqrt(p0 * (1 - p0) / n)
p_value = 1 - stats.norm.cdf(z_stat)

# Output for Part 2
print("=== Hypothesis Test for Proportion (Bernoulli) ===")
print("H0: p <= 0.5 vs H1: p > 0.5")
print(f"Sample proportion (p̂): {p_hat:.3f}")
print(f"Z-statistic: {z_stat:.3f}")
print(f"P-value: {p_value:.4f}")
if p_value < 0.05:
    print("Conclusion: Reject H0 at 5% significance level. Evidence suggests p > 0.5.")
else:
    print("Conclusion: Fail to reject H0. No strong evidence that p > 0.5.")

=== Hypothesis Test for Proportion (Bernoulli) ===
H0: p <= 0.5 vs H1: p > 0.5
Sample proportion (p̂): 0.626
Z-statistic: 2.513
P-value: 0.0060
Conclusion: Reject H0 at 5% significance level. Evidence suggests p > 0.5.
