In [1]:
%pip install discopula

Note: you may need to restart the kernel to use updated packages.


> Make sure to have discopula's latest version installed using `pip`. More information about the latest version can be found at https://pypi.org/project/discopula/

Run the following upgrade commands on your terminal if you are facing issues related to `pip` or `scipy`

```
# pip install --upgrade pip
# pip install --upgrade scipy
```

In [2]:
import numpy as np
from discopula import GenericCheckerboardCopula

# 2-Dimensional Case 

### Create Sample Contingency Table and Initialize the GenericCheckerboardCopula

In the case of initialization of a `GenericCheckerboardCopula` object with a contingency table (`np.array(np.array(...))`), the axis indexing defaults to the outermost starting with 0. In this case of a 2D contingency table, `axis = 0` has 5 categories and `axis = 1` has 3 categories. According to the JMA2021 paper, we have $X_1$ at `axis = 0` and $X_2$ at `axis = 1`.

In [3]:
contingency_table = np.array([
    [0, 0, 20],
    [0, 10, 0],
    [20, 0, 0],
    [0, 10, 0],
    [0, 0, 20]
])
copula = GenericCheckerboardCopula.from_contingency_table(contingency_table)
print(f"Shape of the inferred joint probability matrix P: {copula.P.shape}")
print(f"Probability matrix P:\n{copula.P}")

Shape of the inferred joint probability matrix P: (5, 3)
Probability matrix P:
[[0.    0.    0.25 ]
 [0.    0.125 0.   ]
 [0.25  0.    0.   ]
 [0.    0.125 0.   ]
 [0.    0.    0.25 ]]


### Calculating CCRAM & SCCRAM (non-vectorized)

In [4]:
ccram_0_to_1 = copula.calculate_CCRAM(from_axes=[0], to_axis=1)
ccram_1_to_0 = copula.calculate_CCRAM(from_axes=[1], to_axis=0)
print(f"CCRAM 0->1: {ccram_0_to_1:.4f}")
print(f"CCRAM 1->0: {ccram_1_to_0:.4f}")

sccram_0_to_1 = copula.calculate_CCRAM(from_axes=[0], to_axis=1, scaled=True)
sccram_1_to_0 = copula.calculate_CCRAM(from_axes=[1], to_axis=0, scaled=True)
print(f"SCCRAM 0->1: {sccram_0_to_1:.4f}")
print(f"SCCRAM 1->0: {sccram_1_to_0:.4f}")

CCRAM 0->1: 0.8438
CCRAM 1->0: 0.0000
SCCRAM 0->1: 1.0000
SCCRAM 1->0: 0.0000


### Calculating CCRAM & SCCRAM (vectorized)

In [5]:
ccram_0_to_1_vec = copula.calculate_CCRAM_vectorized(from_axes=[0], to_axis=1)
ccram_1_to_0_vec = copula.calculate_CCRAM_vectorized(from_axes=[1], to_axis=0)
print(f"CCRAM 0->1: {ccram_0_to_1_vec:.4f}")
print(f"CCRAM 1->0: {ccram_1_to_0_vec:.4f}")

sccram_0_to_1_vec = copula.calculate_CCRAM_vectorized(from_axes=[0], to_axis=1, scaled=True)
sccram_1_to_0_vec = copula.calculate_CCRAM_vectorized(from_axes=[1], to_axis=0, scaled=True)
print(f"SCCRAM 0->1: {sccram_0_to_1_vec:.4f}")
print(f"SCCRAM 1->0: {sccram_1_to_0_vec:.4f}")

CCRAM 0->1: 0.8438
CCRAM 1->0: 0.0000
SCCRAM 0->1: 1.0000
SCCRAM 1->0: 0.0000


### Getting Category Predictions

In [6]:
predictions_0_to_1 = copula.get_category_predictions_multi(from_axes=[0], to_axis=1)
print("\nPredictions from axis 0 to axis 1:")
print(predictions_0_to_1)

axis_to_name_dict = {0: "Y", 1: "X"}
predictions_1_to_0 = copula.get_category_predictions_multi(from_axes=[1], to_axis=0, axis_names=axis_to_name_dict)
print("\nPredictions from axis 1 to axis 0:")
print(predictions_1_to_0)


Predictions from axis 0 to axis 1:
   X0 Category  Predicted X1 Category
0            1                      3
1            2                      2
2            3                      1
3            4                      2
4            5                      3

Predictions from axis 1 to axis 0:
   X Category  Predicted Y Category
0           1                     3
1           2                     3
2           3                     3


### Calculating Scores and their Variances

In [8]:
# Calculate and display scores for both axes
scores_axis0 = copula.calculate_scores(axis=0)
scores_axis1 = copula.calculate_scores(axis=1)

print("Scores for axis 0:")
print(scores_axis0)
# Expected: [0.125, 0.3125, 0.5, 0.6875, 0.875]

print("\nScores for axis 1:")
print(scores_axis1)
# Expected: [0.125, 0.375, 0.75]

# Calculate and display variance of scores
variance_axis0 = copula.calculate_variance_S(axis=0)
variance_axis1 = copula.calculate_variance_S(axis=1)

print("\nVariance of scores for axis 0:", variance_axis0)
# Expected: 81/1024 = 0.0791015625
print("Variance of scores for axis 1:", variance_axis1)
# Expected: 9/128 = 0.0703125 

Scores for axis 0:
[np.float64(0.125), np.float64(0.3125), np.float64(0.5), np.float64(0.6875), np.float64(0.875)]

Scores for axis 1:
[np.float64(0.125), np.float64(0.375), np.float64(0.75)]

Variance of scores for axis 0: 0.0791015625
Variance of scores for axis 1: 0.0703125


# 4-Dimensional Case (Real Data Analysis from JMA2021)

### Create Sample Data in Cases Form and Initialize the GenericCheckerboardCopula

In the case of initialization of a `GenericCheckerboardCopula` object with cases data (`np.array(np.array())`), the axis indexing defaults to the outermost starting with 0. In this case of 4D cases, as mentioned by the `shape` parameter: `axis = 0` has 2 categories, `axis = 1` has 3 categories, `axis = 2` has 2 categories, and `axis = 3` has 6 categories. According to the JMA2021 paper, we have $X_1$ at `axis = 0`, $X_2$ at `axis = 1`, $X_3$ at `axis = 2`, and $X_4$ at `axis = 3`.

In [9]:
real_cases_data = np.array([
    # RDA Row 1
    [0,2,0,1],[0,2,0,4],[0,2,0,4],
    [0,2,0,5], [0,2,0,5],[0,2,0,5],[0,2,0,5],
    # RDA Row 2
    [0,2,1,3],[0,2,1,4],[0,2,1,4],[0,2,1,4],
    # RDA Row 3
    [0,1,0,1],[0,1,0,1],[0,1,0,2],[0,1,0,2],[0,1,0,2],
    [0,1,0,4],[0,1,0,4],[0,1,0,4],[0,1,0,4],[0,1,0,4],[0,1,0,4],
    [0,1,0,5],[0,1,0,5],[0,1,0,5],[0,1,0,5],[0,1,0,5],[0,1,0,5],
    # RDA Row 4
    [0,1,1,1],[0,1,1,3],[0,1,1,3],[0,1,1,5],
    # RDA Row 5
    [0,0,0,4],[0,0,0,4],[0,0,0,5],[0,0,0,5],
    # RDA Row 6
    [0,0,1,2],[0,0,1,3],[0,0,1,4],[0,0,1,4],[0,0,1,4],
    # RDA Row 7
    [1,2,0,2],[1,2,0,2],[1,2,0,2],[1,2,0,4],[1,2,0,5],[1,2,0,5],
    # RDA Row 8
    [1,2,1,1],[1,2,1,4],[1,2,1,4],[1,2,1,4],
    # RDA Row 9
    [1,1,0,1],[1,1,0,1],[1,1,0,1],[1,1,0,2],[1,1,0,2],[1,1,0,2],[1,1,0,2],
    [1,1,0,3],[1,1,0,3],[1,1,0,3],[1,1,0,3],[1,1,0,3],
    [1,1,0,4],[1,1,0,4],[1,1,0,4],[1,1,0,4],[1,1,0,4],[1,1,0,4],
    [1,1,0,5],[1,1,0,5],
    # RDA Row 10
    [1,1,1,0],[1,1,1,1],[1,1,1,1],[1,1,1,1],[1,1,1,1],
    [1,1,1,2],[1,1,1,2],[1,1,1,2],[1,1,1,2],
    [1,1,1,3],[1,1,1,3],[1,1,1,3],[1,1,1,5],
    # RDA Row 11
    [1,0,0,0],[1,0,0,0],[1,0,0,1],[1,0,0,1],[1,0,0,2],
    [1,0,0,3],[1,0,0,3],[1,0,0,3],[1,0,0,3],[1,0,0,3],
    [1,0,0,4],[1,0,0,4],
    # RDA Row 12
    [1,0,1,0],[1,0,1,0],[1,0,1,2],[1,0,1,2],
    [1,0,1,3],[1,0,1,3],[1,0,1,3]
])
rda_copula = GenericCheckerboardCopula.from_cases(cases=real_cases_data, shape=(2,3,2,6))
print(f"Shape of the inferred joint probability matrix P: {copula.P.shape}")
print(f"Probability matrix P:\n{rda_copula.P}\n")
print(f"Marginal pdfs:\n{rda_copula.marginal_pdfs}\n")
print(f"Marginal cdfs:\n{rda_copula.marginal_cdfs}")

Shape of the inferred joint probability matrix P: (5, 3)
Probability matrix P:
[[[[0.         0.         0.         0.         0.01941748 0.01941748]
   [0.         0.         0.00970874 0.00970874 0.02912621 0.        ]]

  [[0.         0.01941748 0.02912621 0.         0.05825243 0.05825243]
   [0.         0.00970874 0.         0.01941748 0.         0.00970874]]

  [[0.         0.00970874 0.         0.         0.01941748 0.03883495]
   [0.         0.         0.         0.00970874 0.02912621 0.        ]]]


 [[[0.01941748 0.01941748 0.00970874 0.04854369 0.01941748 0.        ]
   [0.01941748 0.         0.01941748 0.02912621 0.         0.        ]]

  [[0.         0.02912621 0.03883495 0.04854369 0.05825243 0.01941748]
   [0.00970874 0.03883495 0.03883495 0.02912621 0.         0.00970874]]

  [[0.         0.         0.02912621 0.         0.00970874 0.01941748]
   [0.         0.00970874 0.         0.         0.02912621 0.        ]]]]

Marginal pdfs:
{0: array([0.39805825, 0.60194175]), 1

### Calculating CCRAM & SCCRAM (non-vectorized)

In [10]:
rda_ccram_012_to_3 = rda_copula.calculate_CCRAM(from_axes=[0, 1, 2], to_axis=3)
print(f"CCRAM 012->3: {rda_ccram_012_to_3:.4f}")

rda_sccram_012_to_3 = rda_copula.calculate_CCRAM(from_axes=[0, 1, 2], to_axis=3, scaled=True)
print(f"SCCRAM 012->3: {rda_sccram_012_to_3:.4f}")

CCRAM 012->3: 0.2663
SCCRAM 012->3: 0.2776


### Calculating CCRAM & SCCRAM (vectorized)

In [11]:
rda_ccram_012_to_3 = rda_copula.calculate_CCRAM_vectorized(from_axes=[0, 1, 2], to_axis=3)
print(f"CCRAM 012->3: {rda_ccram_012_to_3:.4f}")

rda_sccram_012_to_3 = rda_copula.calculate_CCRAM_vectorized(from_axes=[0, 1, 2], to_axis=3, scaled=True)
print(f"SCCRAM 012->3: {rda_sccram_012_to_3:.4f}")

CCRAM 012->3: 0.2663
SCCRAM 012->3: 0.2776


### Getting Category Predictions

In [12]:
rda_predictions_012_to_3 = rda_copula.get_category_predictions_multi(from_axes=[0, 1, 2], to_axis=3)
print("\nPredictions from axis 012 to axis 3:")
print(rda_predictions_012_to_3)

rda_axis_to_name_dict = {0: "X1", 1: "X2", 2: "X3", 3: "Y = X4"}
rda_predictions_012_to_3_named = rda_copula.get_category_predictions_multi(from_axes=[0, 1, 2], to_axis=3, axis_names=rda_axis_to_name_dict)
print("\nPredictions from X1, X2, X3 to Y = X4:")
print(rda_predictions_012_to_3_named)


Predictions from axis 012 to axis 3:
    X0 Category  X1 Category  X2 Category  Predicted X3 Category
0             1            1            1                      5
1             1            1            2                      5
2             1            2            1                      5
3             1            2            2                      4
4             1            3            1                      5
5             1            3            2                      5
6             2            1            1                      3
7             2            1            2                      3
8             2            2            1                      4
9             2            2            2                      3
10            2            3            1                      5
11            2            3            2                      4

Predictions from X1, X2, X3 to Y = X4:
    X1 Category  X2 Category  X3 Category  Predicted Y = X4 Category
0       