In [2]:
import pandas as pd
from scipy.stats import ks_2samp

def missing_data_count(df, column):
    return df[column].isnull().sum()

def missing_data_percentage(df, column):
    return df[column].isnull().mean() * 100

def missing_department_info(df):
    return df['Department'].isnull().sum()

def duplicate_entries_count(df):
    return df.duplicated().sum()

def duplicate_supplier_names(df):
    return df['Supplier Name'].duplicated().sum()

def duplicate_product_ids(df):
    return df['Product ID'].duplicated().sum()

def inconsistent_date_format(df, column):
    try:
        df[column] = pd.to_datetime(df[column], errors='raise')
    except Exception as e:
        return f"Date format issue: {str(e)}"
    return "No inconsistencies detected"

def inconsistent_phone_number_format(df, column):
    phone_format = df[column].str.contains(r'^\+?[0-9\s\-()]{10,15}$', regex=True)
    return df[~phone_format]

def inconsistent_state_abbreviations(df, column):
    state_mapping = {'CA': 'California', 'NY': 'New York', 'TX': 'Texas', 'FL': 'Florida'}
    df[column] = df[column].apply(lambda x: state_mapping.get(x, x))
    return df[df[column].isnull()]

def compare_monthly_revenues(df, column, months):
    monthly_data = df[df['Month'].isin(months)]
    month_data = [monthly_data[monthly_data['Month'] == month][column] for month in months]
    stat, p_value = ks_2samp(*month_data)
    return p_value

def compare_user_engagement(df, column, quarters):
    quarterly_data = df[df['Quarter'].isin(quarters)]
    quarter_data = [quarterly_data[quarterly_data['Quarter'] == quarter][column] for quarter in quarters]
    stat, p_value = ks_2samp(*quarter_data)
    return p_value

def compare_stock_prices(df, column):
    monthly_data = df.resample('M').mean()
    stat, p_value = ks_2samp(monthly_data[column].dropna(), df[column].dropna())
    return p_value
