# TTest

#Import Libraries

In [None]:
import numpy as np
import pandas as pd
from pandas.api.types import is_numeric_dtype

from scipy import stats
import matplotlib.pyplot as plt

import ipywidgets as widgets
from IPython.display import display

from google.colab import drive
drive.mount('/content/drive')

#Define Methods

In [None]:
#@title #Calculate Quantiles

def calculateQuantiles(df, colA, colB):
  quantiles = df.groupby(colA)[colB].quantile([0, 0.1, 0.25, 0.5, 0.75, 0.9, 1]).unstack(level=1)
  quantiles.columns = [
    'Minimum',
    '10%',
    '25%',
    'Median',
    '75%',
    '90%',
    'Maximum'
  ]

  return quantiles

In [None]:
#@title #Calculate Mean and Standard Deviation

def calculateMeanStd(df, colA, colB, crit_val):
  mean_std = df.groupby(colA)[colB].agg(['size', 'mean', 'std'])
  mean_std['std_error_mean'] = mean_std['std'] / np.sqrt(mean_std['size'])
  mean_std['lower_95'] = mean_std['mean'] - crit_val * mean_std['std_error_mean']
  mean_std['upper_95'] = mean_std['mean'] + crit_val * mean_std['std_error_mean']
  mean_std = mean_std.transpose()

  return mean_std

In [None]:
#@title #Perform T-Test

def ttest(df, colA, colB, groupA, groupB, confidence_level, crit_val):

  group1 = df.loc[df[colA] == groupA, colB]
  group2 = df.loc[df[colA] == groupB, colB]
  t_statistic, p_value = stats.ttest_ind(group1, group2)
  difference = np.mean(group1) - np.mean(group2)
  std_error_diff = np.sqrt((np.var(group1) / (len(group1) - 1)) + (np.var(group2) / (len(group2) - 1)))
  ci_upper = difference + crit_val * std_error_diff
  ci_lower = difference - crit_val * std_error_diff

  t_ratio = t_statistic
  degrees_of_freedom = len(group1) + len(group2) - 2
  two_tailed_p_value = p_value
  right_tailed_p_value = p_value / 2
  left_tailed_p_value = 1 - (p_value / 2)

  t_test_results = pd.DataFrame({
    'Difference': difference,
    'Std Err Dif': std_error_diff, #
    'Upper CL Dif': ci_upper, #
    'Lower CL Dif': ci_lower, #
    'Confidence': confidence_level,
    't Ratio': t_ratio,
    'DF': degrees_of_freedom,
    'Prob > |t|': two_tailed_p_value,
    'Prob > t': left_tailed_p_value,
    'Prob < t': right_tailed_p_value
  }, index=[0])

  print("\nT-Test Results:")
  print(t_test_results.iloc[0,:])
  print()

  # Create an array of x-values
  x = np.linspace(-4, 4, 500)

  # Calculate the probability density function (pdf) for the t-distribution
  pdf = stats.t.pdf(x, degrees_of_freedom)

  # Plotting the probability distribution
  plt.figure(figsize=(10, 6))

  # Plot the pdf
  plt.plot(x, pdf, label='PDF')

  # Fill the area where prob > |t|
  plt.fill_between(x, pdf, where=np.abs(x) > np.abs(t_ratio), color='green', label='prob > |t|')

  # Fill the area where prob < t
  plt.fill_between(x, pdf, where=x < t_ratio, color='red', label='prob < t')

  plt.legend()
  plt.show()

  return t_test_results


#Menu

In [None]:
#@title #Download File and Select Data

#@markdown Make sure the file containing the data you wish to analyze is in your google drive
file_type = "csv" #@param ["csv", "xlsx"]
file_name = 'Hotfas.csv' #@param ["estradiol.csv", "Hot Dogs.xlsx"] {type:"string", allow-input: true}
# Hot Dogs.xlsx

file_path = f"/content/drive/MyDrive/{file_name}"

match file_type:
  case "csv":
    df = pd.read_csv(file_path)
  case "xlsx":
    df = pd.read_excel(file_path)

#@markdown ---

In [None]:
#@title #Display Data

classes_col = [col for col in df.columns if len(df[col].unique()) == 2]
continuous_col = [col for col in df.columns if is_numeric_dtype(df[col])]
INVALID = 'Invalid'

if len(classes_col) == 0:
  print('ERROR - Data MUST contain strictly 2 classes\n')
  classes_col = [INVALID]
if len(continuous_col) == 0:
  continuous_col = [INVALID]
  print('ERROR - No continuous data found\n')

columnA_dropdown = widgets.Dropdown(
    options=classes_col,
    value=classes_col[0],
    description='Classes:',
    disabled=False,
)

columnB_dropdown = widgets.Dropdown(
    options=continuous_col,
    value=continuous_col[0],
    description='Continous:',
    disabled=False,
)

confidence_dropdown = widgets.Dropdown(
    options=[0.90, 0.95, 0.99],
    value=0.95,
    description='CL:',
    disabled=False,
)

@widgets.interact(colA = columnA_dropdown, colB = columnB_dropdown, confidence_level = confidence_dropdown)
def user_selection(colA, colB, confidence_level):
  if INVALID == colA or INVALID == colB:
    return

  groupA, groupB, *_ = df[colA].unique()

  data_buttons = widgets.ToggleButtons(
      options=['Data', 'Quantiles', 'Mean & STD', 'T-Test'],
      description=' ',
      disabled=False,
      button_style='success', # 'success', 'info', 'warning', 'danger' or ''
      # tooltips=['Description of slow', 'Description of regular', 'Description of fast'],
  #     icons=['check'] * 3
  )

  alpha = 1-confidence_level
  alpha /= 2
  deg_free = df.shape[0]-1
  crit_val = np.abs(stats.t.ppf(q=alpha, df=deg_free-1))
  print(f"\nCritical Value: {crit_val}\n")

  @widgets.interact(data = data_buttons)
  def display_data(data):
    try:
      if colA != colB:
        match data:

          case 'Data':
            grouped_data = df.groupby(colA)[colB].apply(list)
            # Create a list of data values for each group
            data_values = [grouped_data[group] for group in grouped_data.index]

            # Plotting the box plot
            plt.boxplot(data_values)

            # Adding labels and title
            plt.xlabel(colA)
            plt.ylabel(colB)
            plt.xticks(range(1, len(grouped_data.index) + 1), grouped_data.index)

            print()
            print(grouped_data,'\n')
            plt.show()

          case 'Quantiles':
            quantiles = calculateQuantiles(df, colA, colB)
            print("\nQuantiles Table:\n")
            print(quantiles)

          case 'Mean & STD':
            mean_std = calculateMeanStd(df, colA, colB, crit_val)
            print("\nMean and Std. Dev. Table:\n")
            print(mean_std)

          case 'T-Test':
            print(f'\nGroup 1: {groupA}\nGroup 2: {groupB}')
            t_test_results = ttest(df, colA, colB, groupA, groupB, confidence_level, crit_val)

    except Exception:
      print("\nInvalid Input")
      plt.close()
