In [2]:
import math
import matplotlib.pyplot as plt

def plot_histograms(df):
    num_columns = df.select_dtypes(include=['float64', 'int64']).columns
    num_cols = len(num_columns)

    # Calculate the number of rows and columns for the layout dynamically
    n_cols = 2  # Set the number of columns for the grid layout (reduce to make graphs smaller)
    n_rows = math.ceil(num_cols / n_cols)  # Calculate rows based on number of columns

    # Adjust the figure size to make the plots smaller (smaller figsize reduces individual plot size)
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(10, n_rows * 3))  # Reduced figure size
    axes = axes.flatten()  # Flatten the 2D array of axes to 1D for easy iteration

    for i, col in enumerate(num_columns):
        df[col].hist(bins=15, ax=axes[i], edgecolor='black')
        axes[i].set_title(f'Histogram of {col}')
    
    # Hide any unused subplots
    for i in range(num_cols, len(axes)):
        fig.delaxes(axes[i])
    
    plt.tight_layout()
    plt.suptitle("Histograms for Numerical Columns", y=1.02)
    plt.show()


In [8]:
import pandas as pd
from scipy.stats import chi2_contingency

# Function to perform chi-square test between two columns
def chi_square_test(df, col1, col2):
    # Create a contingency table
    contingency_table = pd.crosstab(df[col1], df[col2])

    # Perform Chi-Square test
    chi2, p, dof, expected = chi2_contingency(contingency_table)
    
    if p < 0.05:
        result = 'significant' + 'there is an association'
    else:
        result = 'insignificant'

    print(f"Chi-Square Test between {col1} and {col2}:")
    print(f"Chi2 Statistic: {chi2}")
    print(f"P-value: {p} - ({result}) \n")
    return p
