In [1]:
import numpy as np
import pandas as pd
from scipy.stats import rankdata, norm

def rank_difference_test(series, alpha=0.05):
    x = np.array(series)
    x = x[~np.isnan(x)]             # remove missing values
    n = len(x)

    # assign ranks
    ranks = rankdata(x, method='average')

    # compute U = sum |Ri - Ri-1|
    U = np.sum(np.abs(ranks[1:] - ranks[:-1]))

    # mean and variance from slide
    mu = ((n + 1) * (n - 1)) / 3
    sigma2 = ((n - 2) * (n + 1) * (4*n - 7)) / 90
    sigma = np.sqrt(sigma2)

    # z value
    Z = abs(U - mu) / sigma

    # critical value
    Zcrit = norm.ppf(1 - alpha/2)

    if Z < Zcrit:
        result = "Series is RANDOM"
    else:
        result = "Series is NOT random"

    return {
        "n": n,
        "U": U,
        "mu": mu,
        "sigma^2": sigma2,
        "Z": Z,
        "Zcrit": Zcrit,
        "Result": result
    }
file = r"D:\climate change\monthly_averages.xlsx"
df = pd.read_excel(file, sheet_name="Rainfall_monthly", index_col=0).drop(["Lon","Lat"], errors="ignore")

results = []

for col in df.columns:
    res = rank_difference_test(df[col])
    res["Station"] = col
    results.append(res)

pd.DataFrame(results).to_csv(r"D:\climate change\Randomness_RankDiff.csv", index=False)
print("✅ Saved: Randomness_RankDiff.csv")


✅ Saved: Randomness_RankDiff.csv
