# Chi Squared Test of Independence

This notebook demonstrates how to use the chi-squared test of independence to determine if two categorical variables are independent or not.


## Setup

Ignore this part

In [None]:
%load_ext rpy2.ipython
%load_ext autoreload
%autoreload 2

%matplotlib inline  
from matplotlib import rcParams
rcParams['figure.figsize'] = (16, 100)

import warnings
from rpy2.rinterface import RRuntimeWarning
warnings.filterwarnings("ignore") # Ignore all warnings
# warnings.filterwarnings("ignore", category=RRuntimeWarning) # Show some warnings

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from IPython.display import display, HTML

In [None]:
%%javascript
// Disable auto-scrolling
IPython.OutputArea.prototype._should_scroll = function(lines) {
    return false;
}

## Load R libraries and data

In [None]:
%%R -o mtcars

# My commonly used R imports

require('tidyverse')

In [None]:
mtcars

In [None]:
%%R

# Load the mtcars dataset and convert 0 and 1 to words for better readability
mtcars$am <- factor(mtcars$am, levels = c(0, 1), labels = c('automatic', 'manual'))
mtcars$vs <- factor(mtcars$vs, levels = c(0, 1), labels = c('V-Shaped', 'Straight'))

# show
mtcars %>% head(5)


## `table` in R 

In [None]:
%%R 

cross_table <- table(mtcars$vs, mtcars$am)
cross_table

In [None]:
%%R 

# make a stacked bar with ggplot
ggplot(mtcars, aes(x = am, fill = am)) +
  geom_bar(position = 'dodge2') +
  scale_y_continuous(breaks = seq(0, 20, 2)) +
  labs(title = 'Transmission type by engine shape',
       x = '',
       y = 'Count') +
  theme_minimal() + 
  facet_wrap(~vs)



## Chi Squared Test

In [None]:
%%R 

# run a chi-squared test
chisq.test(cross_table, correct = FALSE)