<a href="https://colab.research.google.com/github/changsksu/IMSE_Data_Science/blob/main/T2_Two_Sample_Test_Drug_Example.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# This code demonstrates how to use Hotelling T2 statistics and F test for two multivariate samples.
Ref. Chat-GPT 3.5 & Drug and Placebo data from Real Statistics
https://real-statistics.com/free-download/real-statistics-examples-workbook/

In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statistics
from scipy.stats import f

def hotelling_t2_test(sample1, sample2, alpha):
    n1, p = sample1.shape
    n2 = sample2.shape[0]
    n = n1 + n2

    xbar1 = np.mean(sample1, axis=0)
    xbar2 = np.mean(sample2, axis=0)

    s1 = np.cov(sample1, rowvar=False, bias=True)
    s2 = np.cov(sample2, rowvar=False, bias=True)

    pooled_cov = ((n1 - 1) * s1 + (n2 - 1) * s2) / (n - 2)

    t_squared = (n1 * n2 * np.dot((xbar1 - xbar2), np.dot(np.linalg.inv(pooled_cov), (xbar1 - xbar2)))) / (n * p)

    dof1 = p
    dof2 = n - p - 1
    critical_value = f.ppf(1 - alpha, dof1, dof2)

    p_value = 1 - f.cdf(t_squared, dof1, dof2)

    return t_squared, p_value, critical_value



In [20]:
# Example usage using simulated data:
sample1 = np.random.normal(loc=0, scale=1, size=(30, 2))
sample2 = np.random.normal(loc=0.5, scale=1, size=(30, 2))
alpha = 0.05

t_squared, p_value, critical_value = hotelling_t2_test(sample1, sample2, alpha)

print("Hotelling's T-squared statistic:", t_squared)
print("P-value:", p_value)
print("Critical value:", critical_value)
if t_squared > critical_value:
    print("Reject null hypothesis: The means are significantly different.")
else:
    print("Fail to reject null hypothesis: There is no significant difference in means.")

Hotelling's T-squared statistic: 0.11463418122286932
P-value: 0.8918973301222026
Critical value: 3.1588427192606465
Fail to reject null hypothesis: There is no significant difference in means.


In [None]:
# Example from Drug vs Placebo data
# retrive data from Github 641 folder
data = pd.read_csv('https://raw.githubusercontent.com/changsksu/K-State-IMSE641/master/Hotelling_T2.csv', sep=',', na_values=".")
#generate the Phase I data using filter
sample3 = data[data['ID'] == 'Drug']
sample4 = data[data['ID'] == 'Placebo']
sample3

In [30]:
#x3=sample3['Fever', 'Pressure', 'Aches']
x3=sample3.iloc[:, :3]
x4=sample4.iloc[:, :3]
sample1=x3.values
sample2=x4.values

In [31]:
# Example usage for the drug testing example:

alpha = 0.05

t_squared, p_value, critical_value = hotelling_t2_test(sample1, sample2, alpha)

print("Hotelling's T-squared statistic:", t_squared)
print("P-value:", p_value)
print("Critical value:", critical_value)
if t_squared > critical_value:
    print("Reject null hypothesis: The means are significantly different.")
else:
    print("Fail to reject null hypothesis: There is no significant difference in means.")

Hotelling's T-squared statistic: 1.4481573992783792
P-value: 0.2459922486126942
Critical value: 2.882604204261227
Fail to reject null hypothesis: There is no significant difference in means.
