### basic checks for the assumptions of Chi Sq

In [2]:
import pandas as pd
import numpy as np

def check_chi_square_assumptions(df, column1, column2):
    # Check for categorical variables
    if not np.issubdtype(df[column1].dtype, np.dtype(object).type) or not np.issubdtype(df[column2].dtype, np.dtype(object).type):
        return "Both variables should be categorical."
    
    # Check for large sample size
    contingency_table = pd.crosstab(df[column1], df[column2])
    if (contingency_table.values < 5).sum() > 0:
        return "Sample size is too small, there should be at least 5 observations in each cell of the contingency table."
    
    # Check for independence of variables
    corr = df[[column1, column2]].corr().iloc[0, 1]
    if abs(corr) >= 0.8:
        return "The variables are not independent, the correlation between them is too high."
    
    # Assumptions appear to be met
    return "Assumptions for the chi-square test of independence appear to be met."


### Chi Sq Test

In [1]:
import pandas as pd
import numpy as np
from scipy import stats

def chi_square_test_of_independence(df, alpha=0.05):
    columns = df.columns
    results_df = pd.DataFrame(columns=['Column 1', 'Column 2', 'Chi-Square Statistic', 'p-Value', 'Null Hypothesis'])
    for i in range(0, len(columns)):
        for j in range(i+1, len(columns)):
            column1 = columns[i]
            column2 = columns[j]
            contingency_table = pd.crosstab(df[column1], df[column2])
            chi2, p, dof, expected = stats.chi2_contingency(contingency_table)
            if p < alpha:
                results_df = results_df.append({'Column 1': column1, 'Column 2': column2, 
                                                'Chi-Square Statistic': chi2, 'p-Value': p,
                                                'Null Hypothesis': 'Rejected'}, ignore_index=True)
            else:
                results_df = results_df.append({'Column 1': column1, 'Column 2': column2, 
                                                'Chi-Square Statistic': chi2, 'p-Value': p,
                                                'Null Hypothesis': 'Accepted'}, ignore_index=True)
    return results_df


In [3]:
import pandas as pd
import numpy as np

def chi_square(df, column1, column2):
    contingency_table = pd.crosstab(df[column1], df[column2])
    chi2, p, dof, expected = stats.chi2_contingency(contingency_table)
    return chi2, p


In [4]:
import pandas as pd
import numpy as np
from scipy import stats

def chi_square_test_of_independence(df):
    results = []
    columns = df.columns
    for i in range(0, len(columns)):
        for j in range(i+1, len(columns)):
            column1 = columns[i]
            column2 = columns[j]
            contingency_table = pd.crosstab(df[column1], df[column2])
            chi2, p, dof, expected = stats.chi2_contingency(contingency_table)
            results.append((column1, column2, chi2, p))
    return results
