## Ordinal Regression Demonstration

Using `regression-inference` package

In [1]:
from regression_inference import OrdinalLogisticRegression, summary

In [2]:
import numpy as np
import pandas as pd

In [3]:
#Â© Copyright 2007 - 2025, scikit-learn developers (BSD License).
from sklearn.datasets import load_wine
data = load_wine(as_frame = True).frame

In [4]:
# True: Use CUDA to train models on GPU
CUDA = True

### Model Fitting

- Fit the Ordinal Logistic Regression on the training set

In [5]:
data['const'] = np.ones(len(data))

features = data[[
    'target',
    'alcohol', 'malic_acid', 'ash',
    'alcalinity_of_ash', 'total_phenols',
]].dropna()

X = features.drop(columns=['target'])
y = features['target']

model = OrdinalLogisticRegression().fit(X=X, y=y, cuda = CUDA, cov_type=None, adj_cutpoints=True, alpha=0.05)

CUDA Acceleration is Experimental
Device: NVIDIA GeForce RTX 3060

  model = OrdinalLogisticRegression().fit(X=X, y=y, cuda = CUDA, cov_type=None, adj_cutpoints=True, alpha=0.05)


In [6]:
print(model)

Ordinal Regression Results
---------------------------------------------
Dependent:                             target
---------------------------------------------
 
alcohol                               -0.3714
                                     (0.2715)
 
malic_acid                          0.6732***
                                     (0.1725)
 
ash                                 -2.4390**
                                     (1.0083)
 
alcalinity_of_ash                   0.4221***
                                     (0.1072)
 
total_phenols                      -3.1317***
                                     (0.4676)
 
0:1                                -9.7100***
                                     (3.6327)
 
1:2                                 1.4735***
                                     (0.1481)

---------------------------------------------
Accuracy                                0.736
Pseudo R-squared                        0.473
LR Statistic                          

### Model Predictions

- Predict a set of data

- Predict new values

- Predict with inference table

- Predict at sample mean

- Predict over range of specified values

In [7]:
model.feature_names

Index(['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'total_phenols'], dtype='object')

In [8]:
# All predictions are in order of model.feature_names

model.predict( X = [[12.85, 1.6, 2.52, 17.8, 2.48]] )

array([[0.59487471, 0.39653677, 0.00858851]])

In [9]:
# Predict new values with inference

prediction = model.predict(X = [ [12.85, 1.6, 2.52, 17.8, 2.48] ], return_table = True )

pd.DataFrame(prediction)

Unnamed: 0,features,prediction_class,cumulative_probabilities,prediction_prob,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"[{'alcohol': '12.85', 'malic_acid': '1.60', 'a...",0,"[0.5949, 0.9914, 1.0]","[0.5949, 0.3965, 0.0086]","[0.0922, 0.0901, 0.0059]","[6.449, 4.3993, 1.4633]","[0.000, 0.000, 0.143]","[0.4141, 0.2199, 0.0]","[0.7757, 0.5732, 0.0201]"


In [10]:
prediction_set = [
     [[12.85, 1.6, 2.52, 17.8, 2.48]],
     [[13.73, 1.5, 2.7, 22.5, 3]],
] 

predictions = pd.concat(
    [pd.DataFrame(model.predict(X = pred, return_table=True)) for pred in prediction_set]
)

predictions

Unnamed: 0,features,prediction_class,cumulative_probabilities,prediction_prob,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"[{'alcohol': '12.85', 'malic_acid': '1.60', 'a...",0,"[0.5949, 0.9914, 1.0]","[0.5949, 0.3965, 0.0086]","[0.0922, 0.0901, 0.0059]","[6.449, 4.3993, 1.4633]","[0.000, 0.000, 0.143]","[0.4141, 0.2199, 0.0]","[0.7757, 0.5732, 0.0201]"
0,"[{'alcohol': '13.73', 'malic_acid': '1.50', 'a...",0,"[0.703, 0.9947, 1.0]","[0.703, 0.2916, 0.0053]","[0.0957, 0.0945, 0.0035]","[7.3491, 3.087, 1.5463]","[0.000, 0.002, 0.122]","[0.5155, 0.1065, 0.0]","[0.8905, 0.4768, 0.0121]"


In [11]:
# Predict at the sample mean
  
sample_mean = (
    [X[i].mean() for i in list(model.feature_names)] # Preserves ordering
) 

prediction_set = [[sample_mean]] 

predictions = pd.concat(
    [pd.DataFrame(model.predict(X = pred, return_table=True)) for pred in prediction_set]
)

predictions

Unnamed: 0,features,prediction_class,cumulative_probabilities,prediction_prob,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",1,"[0.1513, 0.9334, 1.0]","[0.1513, 0.7821, 0.0666]","[0.0475, 0.0624, 0.0238]","[3.1849, 12.5314, 2.8012]","[0.001, 0.000, 0.005]","[0.0582, 0.6598, 0.02]","[0.2444, 0.9044, 0.1132]"


In [12]:
'''
Predict increments of 'ash' holding all else at the sample mean

Maintain order of lm.feature_names, ie, ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'total_phenols']
'''

prev_names, post_names = ['alcohol', 'malic_acid'], ['alcalinity_of_ash', 'total_phenols']

mean_prev, mean_post = [X[i].mean() for i in prev_names], [X[i].mean() for i in post_names]


prediction_range = np.linspace(
    X['ash'].min(),
    X['ash'].max(),
    20                          # Number of predictions 
)

prediction_set = [
    [ mean_prev + [i] + mean_post]
    for i in prediction_range  
] 

predictions = pd.concat(
    [pd.DataFrame(model.predict(X = pred, return_table=True)) for pred in prediction_set]
)

predictions.tail()

Unnamed: 0,features,prediction_class,cumulative_probabilities,prediction_prob,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",1,"[0.3592, 0.9778, 1.0]","[0.3592, 0.6186, 0.0222]","[0.1214, 0.1145, 0.0155]","[2.9601, 5.4003, 1.4322]","[0.003, 0.000, 0.152]","[0.1214, 0.3941, 0.0]","[0.5971, 0.8431, 0.0525]"
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",1,"[0.4162, 0.9825, 1.0]","[0.4162, 0.5663, 0.0175]","[0.1467, 0.1387, 0.0138]","[2.8373, 4.0833, 1.2694]","[0.005, 0.000, 0.204]","[0.1287, 0.2945, 0.0]","[0.7036, 0.8381, 0.0446]"
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",1,"[0.4754, 0.9862, 1.0]","[0.4754, 0.5108, 0.0138]","[0.1711, 0.1629, 0.0122]","[2.7789, 3.1362, 1.1368]","[0.005, 0.002, 0.256]","[0.1401, 0.1916, 0.0]","[0.8107, 0.83, 0.0377]"
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",0,"[0.5353, 0.9891, 1.0]","[0.5353, 0.4538, 0.0109]","[0.1921, 0.1842, 0.0106]","[2.7867, 2.4633, 1.0276]","[0.005, 0.014, 0.304]","[0.1588, 0.0927, 0.0]","[0.9118, 0.8148, 0.0318]"
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",0,"[0.5942, 0.9914, 1.0]","[0.5942, 0.3971, 0.0086]","[0.2077, 0.2004, 0.0092]","[2.8614, 1.9818, 0.9365]","[0.004, 0.048, 0.349]","[0.1872, 0.0044, 0.0]","[1.0, 0.7899, 0.0266]"


### Coefficient Inference Table

- Comprehensive regression inference

In [13]:
pd.DataFrame(model.inference_table())

Unnamed: 0,feature,coefficient,std_error,z_statistic,P>|t|,ci_low_0.05,ci_high_0.05
0,alcohol,-0.3714,0.2715,-1.368,0.171,-0.9035,0.1607
1,malic_acid,0.6732,0.1725,3.9032,0.0,0.3352,1.0113
2,ash,-2.439,1.0083,-2.4188,0.016,-4.4153,-0.4627
3,alcalinity_of_ash,0.4221,0.1072,3.9373,0.0,0.212,0.6323
4,total_phenols,-3.1317,0.4676,-6.6974,0.0,-4.0481,-2.2152
5,0:1,-9.71,3.6327,-2.6729,0.008,-16.83,-2.59
6,1:2,1.4735,0.1481,9.9522,0.0,1.1833,1.7637


### Variance Inflation Factor

- Generate a VIF table on the models features

In [14]:
pd.DataFrame(model.variance_inflation_factor())

Unnamed: 0,feature,VIF
0,alcohol,1.4244
1,malic_acid,1.2727
2,ash,1.6087
3,alcalinity_of_ash,1.8769
4,total_phenols,1.3988
