# 🧪 Hypothesis Testing & Linear Regression: Chess Playing vs Cognitive Performance

## 📌 Objective
Analyze the impact of chess playing on multiple cognitive skills:
- Reaction Time
- Stroop Test
- Digit Span Memory Test
- Verbal Memory Test

using t-tests and linear regression models.

**Null Hypothesis (H₀):** Chess playing does not significantly affect cognitive outcomes.
**Alternative Hypothesis (H₁):** Chess playing improves cognitive outcomes.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind
import statsmodels.api as sm

# Load all datasets
reaction_df = pd.read_csv('/mnt/data/reaction_time_datas - reaction_time_datas - Sheet1 (1) (2).csv')
digit_df = pd.read_csv('/mnt/data/processed_digit_span_data (1) (2).csv')
stroop_df = pd.read_csv('/mnt/data/Stroop_score_datas - Sheet1 (2).csv')
verbal_df = pd.read_csv('/mnt/data/verbal_test_data (1).csv')


## 📚 Data Preparation

In [None]:
# Reaction Time Dataset
reaction_df.columns = reaction_df.columns.str.strip()
reaction_df['Date'] = pd.to_datetime(reaction_df['Date'], dayfirst=True)
reaction_df['Reaction Time (Before)'] = reaction_df['Reaction Time (Before)'].str.replace(',', '.').astype(float)
reaction_df['Reaction Time (After)'] = reaction_df['Reaction Time (After)'].str.replace(',', '.').astype(float)
reaction_df['Reaction Improvement'] = reaction_df['Reaction Time (Before)'] - reaction_df['Reaction Time (After)']

# Digit Span Dataset
digit_df['Date'] = pd.to_datetime(digit_df['Date'])
digit_df['Digit Span Improvement'] = digit_df['Digit Span (After)'] - digit_df['Digit Span (Before)']

# Stroop Dataset
stroop_df_clean = stroop_df.iloc[2:].copy()
stroop_df_clean.columns = ['Date', 'Chess Played?', 'Stroop Score (Before)', 'Stroop Score (After)']
stroop_df_clean['Date'] = pd.to_datetime(stroop_df_clean['Date'], dayfirst=True)
stroop_df_clean['Stroop Score (Before)'] = stroop_df_clean['Stroop Score (Before)'].astype(float)
stroop_df_clean['Stroop Score (After)'] = stroop_df_clean['Stroop Score (After)'].astype(float)
stroop_df_clean['Stroop Improvement'] = stroop_df_clean['Stroop Score (After)'] - stroop_df_clean['Stroop Score (Before)']

# Verbal Memory Dataset
verbal_df['Date'] = pd.to_datetime(verbal_df['Date'], dayfirst=True)
verbal_pivot = verbal_df.pivot(index=['Date', 'Chess Played?'], columns='Test Phase', values='Verbal Memory Score').reset_index()
verbal_pivot['Verbal Improvement'] = verbal_pivot['After'] - verbal_pivot['Before']


## 🧪 T-tests: Comparing Chess vs Non-Chess Days

In [None]:
# Reaction Time
reaction_chess = reaction_df[reaction_df['Chess Played?'] == 'Yes']['Reaction Improvement']
reaction_nochess = reaction_df[reaction_df['Chess Played?'] == 'No']['Reaction Improvement']
reaction_t, reaction_p = ttest_ind(reaction_chess, reaction_nochess, equal_var=False, nan_policy='omit')

# Digit Span
digit_chess = digit_df[digit_df['Chess Played?'] == 'Yes']['Digit Span Improvement']
digit_nochess = digit_df[digit_df['Chess Played?'] == 'No']['Digit Span Improvement']
digit_t, digit_p = ttest_ind(digit_chess, digit_nochess, equal_var=False, nan_policy='omit')

# Stroop
stroop_chess = stroop_df_clean[stroop_df_clean['Chess Played?'] == 'Yes']['Stroop Improvement']
stroop_nochess = stroop_df_clean[stroop_df_clean['Chess Played?'] == 'No']['Stroop Improvement']
stroop_t, stroop_p = ttest_ind(stroop_chess, stroop_nochess, equal_var=False, nan_policy='omit')

# Verbal Memory
verbal_chess = verbal_pivot[verbal_pivot['Chess Played?'] == 'Yes']['Verbal Improvement']
verbal_nochess = verbal_pivot[verbal_pivot['Chess Played?'] == 'No']['Verbal Improvement']
verbal_t, verbal_p = ttest_ind(verbal_chess, verbal_nochess, equal_var=False, nan_policy='omit')

print(f"Reaction Time Improvement t-test p-value: {reaction_p:.4f}")
print(f"Digit Span Improvement t-test p-value: {digit_p:.4f}")
print(f"Stroop Improvement t-test p-value: {stroop_p:.4f}")
print(f"Verbal Memory Improvement t-test p-value: {verbal_p:.4f}")

## 📈 Linear Regression: Chess Played vs Improvements

In [None]:
# Prepare Regression Data
stroop_df_clean['Chess_Yes'] = stroop_df_clean['Chess Played?'].apply(lambda x: 1 if x == 'Yes' else 0)
reaction_df['Chess_Yes'] = reaction_df['Chess Played?'].apply(lambda x: 1 if x == 'Yes' else 0)
digit_df['Chess_Yes'] = digit_df['Chess Played?'].apply(lambda x: 1 if x == 'Yes' else 0)
verbal_pivot['Chess_Yes'] = verbal_pivot['Chess Played?'].apply(lambda x: 1 if x == 'Yes' else 0)

# Stroop Regression
X_stroop = sm.add_constant(stroop_df_clean['Chess_Yes'])
model_stroop = sm.OLS(stroop_df_clean['Stroop Improvement'], X_stroop).fit()

# Reaction Regression
X_reaction = sm.add_constant(reaction_df['Chess_Yes'])
model_reaction = sm.OLS(reaction_df['Reaction Improvement'], X_reaction).fit()

# Digit Span Regression
X_digit = sm.add_constant(digit_df['Chess_Yes'])
model_digit = sm.OLS(digit_df['Digit Span Improvement'], X_digit).fit()

# Verbal Regression
X_verbal = sm.add_constant(verbal_pivot['Chess_Yes'])
model_verbal = sm.OLS(verbal_pivot['Verbal Improvement'], X_verbal).fit()

print(model_stroop.summary())
print(model_reaction.summary())
print(model_digit.summary())
print(model_verbal.summary())