In [2]:
import math
import matplotlib.pyplot as plt

def plot_histograms(df):
    num_columns = df.select_dtypes(include=['float64', 'int64']).columns
    num_cols = len(num_columns)

    # Calculate the number of rows and columns for the layout dynamically
    n_cols = 2  # Set the number of columns for the grid layout (reduce to make graphs smaller)
    n_rows = math.ceil(num_cols / n_cols)  # Calculate rows based on number of columns

    # Adjust the figure size to make the plots smaller (smaller figsize reduces individual plot size)
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(10, n_rows * 3))  # Reduced figure size
    axes = axes.flatten()  # Flatten the 2D array of axes to 1D for easy iteration

    for i, col in enumerate(num_columns):
        df[col].hist(bins=15, ax=axes[i], edgecolor='black')
        axes[i].set_title(f'Histogram of {col}')
    
    # Hide any unused subplots
    for i in range(num_cols, len(axes)):
        fig.delaxes(axes[i])
    
    plt.tight_layout()
    plt.suptitle("Histograms for Numerical Columns", y=1.02)
    plt.show()


In [8]:
import pandas as pd
from scipy.stats import chi2_contingency

# Function to perform chi-square test between two columns
def chi_square_test(df, col1, col2):
    # Create a contingency table
    contingency_table = pd.crosstab(df[col1], df[col2])

    # Perform Chi-Square test
    chi2, p, dof, expected = chi2_contingency(contingency_table)
    
    if p < 0.05:
        result = 'significant' + 'there is an association'
    else:
        result = 'insignificant'

    print(f"Chi-Square Test between {col1} and {col2}:")
    print(f"Chi2 Statistic: {chi2}")
    print(f"P-value: {p} - ({result}) \n")
    return p


In [11]:
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix

def plot_confusion_matrix_with_legend(y_test, y_pred, title='Confusion Matrix with Legend'):
    """
    Plots a confusion matrix with class labels and a legend.
    
    Parameters:
    y_test : array-like
        True labels of the test data.
    y_pred : array-like
        Predicted labels from the model.
    title : str, optional
        Title for the confusion matrix plot.
    """
    # Confusion matrix
    conf_matrix = confusion_matrix(y_test, y_pred)

    # Create the labels for the length of stay categories (adjust according to your class mapping)
    class_labels = sorted(y_test.unique())  # Assuming y_test contains numeric or categorical labels for length of stay

    # Plot the confusion matrix
    plt.figure(figsize=(10, 7))
    sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', cbar=False,
                xticklabels=class_labels, yticklabels=class_labels)

    plt.title(title)
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')

    # Show the plot
    plt.show()


In [12]:
import importlib
import pkg_resources

def check_requirements(requirements_file='requirements.txt'):
    with open(requirements_file, 'r') as file:
        for line in file:
            package = line.strip().split('==')[0]
            try:
                importlib.import_module(package)
                print(f"{package} is installed.")
            except ImportError:
                print(f"{package} is NOT installed.")

# Check for installed packages and correct versions



pandas is installed.
numpy is installed.
scikit-learn is NOT installed.
seaborn is installed.
matplotlib is installed.
plotly is installed.
scipy is installed.
xgboost is installed.
pandas==2.0.3 version is correct.
numpy==1.24.2 version conflict: numpy 1.24.3 is installed but numpy==1.24.2 is required
scikit-learn==1.3.0 version is correct.
seaborn==0.12.2 version is correct.
matplotlib==3.8.0 version conflict: matplotlib 3.7.2 is installed but matplotlib==3.8.0 is required
plotly==5.15.0 version conflict: plotly 5.9.0 is installed but plotly==5.15.0 is required
scipy==1.11.2 version conflict: scipy 1.11.1 is installed but scipy==1.11.2 is required
xgboost==1.7.6 version conflict: xgboost 2.1.1 is installed but xgboost==1.7.6 is required
