In [2]:
import numpy as np
import pandas as pd
from pathlib import Path
from scipy.stats import rankdata, norm

def rank_difference_test(series, alpha=0.05):
    """
    Performs the Rank Difference test for randomness.
    Returns a dictionary with test statistics and decision.
    """
    x = np.array(series)
    x = x[~np.isnan(x)]      # remove missing values
    n = len(x)

    if n < 2:
        return {
            "n": n,
            "U": np.nan,
            "mu": np.nan,
            "sigma^2": np.nan,
            "Z": np.nan,
            "Zcrit": np.nan,
            "Decision": "Not Enough Data"
        }

    # assign ranks
    ranks = rankdata(x, method='average')

    # compute U = sum |Ri - Ri-1|
    U = np.sum(np.abs(ranks[1:] - ranks[:-1]))

    # mean and variance
    mu = ((n + 1) * (n - 1)) / 3
    sigma2 = ((n - 2) * (n + 1) * (4*n - 7)) / 90
    
    if sigma2 <= 0: # Handle cases with low n or no variance
        return {
            "n": n,
            "U": U,
            "mu": mu,
            "sigma^2": sigma2,
            "Z": np.nan,
            "Zcrit": np.nan,
            "Decision": "Calc Error (Sigma=0)"
        }
        
    sigma = np.sqrt(sigma2)

    # z value
    Z = abs(U - mu) / sigma

    # critical value
    Zcrit = norm.ppf(1 - alpha/2)

    if Z < Zcrit:
        result = "‚úÖ Random"
    else:
        result = "‚ùå Not Random"

    return {
        "n": n,
        "U": round(U, 2),
        "mu": round(mu, 2),
        "sigma^2": round(sigma2, 2),
        "Z": round(Z, 2),
        "Zcrit": round(Zcrit, 2),
        "Decision": result
    }

# ---- EDIT FILE PATHS ----
file_path = Path(r"D:\climate change\monthly_averages.xlsx")
output_path = Path(r"D:\climate change\Randomness_RankDiff_AllSheets.csv")

# Load all sheets in Excel file
try:
    xls = pd.ExcelFile(file_path)
    print(f"Successfully loaded {file_path}")
except FileNotFoundError:
    print(f"‚ùå ERROR: File not found at {file_path}")
    exit()
except Exception as e:
    print(f"‚ùå ERROR: Could not load Excel file. {e}")
    exit()

print("Running Rank Difference Test for Randomness")
print("="*80)
print("(Null Hypothesis: The data series is random)")
print("Z < Zcrit: Cannot reject null hypothesis (‚úÖ Random)")
print("Z >= Zcrit: Reject null hypothesis (‚ùå Not Random)")
print("="*80)

results = []

for sheet in xls.sheet_names:
    print(f"\nüìÑ Sheet: {sheet}")
    try:
        df = pd.read_excel(file_path, sheet_name=sheet)
    except Exception as e:
        print(f"  ...Could not read sheet '{sheet}'. Error: {e}")
        continue

    # Select only numeric columns
    num_df = df.select_dtypes(include=[np.number])
    
    # Drop Lat/Lon if they exist, as they are not time series
    num_df = num_df.drop(columns=["Lon", "Lat"], errors="ignore")

    if num_df.empty:
        print("  ...No numeric data columns found in this sheet.")
        continue

    for col in num_df.columns:
        res = rank_difference_test(num_df[col])
        res["Sheet"] = sheet
        res["Station"] = col
        results.append(res)
    
    # Corrected the variable name from num_col to num_df
    print(f"  ...processed {len(num_df.columns)} stations.")

# Save all results to one CSV
try:
    df_results = pd.DataFrame(results)
    
    # Reorder columns to be more readable
    cols = ["Sheet", "Station", "Decision", "Z", "Zcrit", "U", "mu", "sigma^2", "n"]
    # Ensure all columns are present, even if results were empty
    df_results = df_results.reindex(columns=cols, fill_value=np.nan) 
    
    df_results.to_csv(output_path, index=False)
    print(f"\n‚úÖ All results saved to:\n{output_path}")
except Exception as e:
    print(f"\n‚ùå ERROR: Could not save CSV file. {e}")

print("\n...Done.")



Successfully loaded D:\climate change\monthly_averages.xlsx
Running Rank Difference Test for Randomness
(Null Hypothesis: The data series is random)
Z < Zcrit: Cannot reject null hypothesis (‚úÖ Random)
Z >= Zcrit: Reject null hypothesis (‚ùå Not Random)

üìÑ Sheet: Rainfall_monthly
  ...processed 23 stations.

üìÑ Sheet: Tmax_monthly
  ...processed 23 stations.

üìÑ Sheet: Tmin_monthly
  ...processed 23 stations.

‚úÖ All results saved to:
D:\climate change\Randomness_RankDiff_AllSheets.csv

...Done.
