## Ordinal Regression Demonstration

Using `regression-inference` package

In [1]:
from regression_inference import OrdinalLogisticRegression, summary

In [2]:
import numpy as np
import pandas as pd

In [3]:
#Â© Copyright 2007 - 2025, scikit-learn developers (BSD License).
from sklearn.datasets import load_wine
data = load_wine(as_frame = True).frame

In [4]:
# True: Use CUDA to train models on GPU
CUDA = False

### Model Fitting

- Fit the Ordinal Logistic Regression on the training set

In [5]:
features = data[[
    'target',
    'alcohol', 'malic_acid', 'ash',
    'alcalinity_of_ash', 'total_phenols',
]].dropna()

X = features.drop(columns=['target'])
y = features['target']

model = OrdinalLogisticRegression().fit(X=X, y=y, cov_type=None, alpha=0.05, max_iter=100, tol=1e-4, adj_cutpoints=False, cuda=CUDA)

In [6]:
print(model)

Ordinal Regression Results
---------------------------------------------
Dependent:                             target
---------------------------------------------
 
alcohol                               -0.3714
                                     (0.2611)
 
malic_acid                          0.6732***
                                     (0.1879)
 
ash                                -2.4390***
                                     (0.8521)
 
alcalinity_of_ash                   0.4221***
                                     (0.0862)
 
total_phenols                      -3.1317***
                                     (0.4616)
 
0:1                                -9.7101***
                                     (3.6148)
 
1:2                                   -5.3455
                                     (3.5245)

---------------------------------------------
Accuracy                                0.736
Pseudo R-squared                        0.473
LR Statistic                          

### Model Predictions


In [7]:
model.feature_names

Index(['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'total_phenols'], dtype='object')

In [8]:
# All predictions are in order of model.feature_names

model.predict( X = [[12.85, 1.6, 2.52, 17.8, 2.48]] )

array([[0.59487409, 0.39653737, 0.00858854]])

In [9]:
# Predict new values with inference

prediction = model.predict(X = [ [12.85, 1.6, 2.52, 17.8, 2.48] ], return_table = True )

pd.DataFrame(prediction)

Unnamed: 0,features,prediction_class,cumulative_probabilities,prediction_prob,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"[{'alcohol': '12.85', 'malic_acid': '1.60', 'a...",0,"[0.5949, 0.9914, 1.0]","[0.5949, 0.3965, 0.0086]","[0.0797, 0.0778, 0.0051]","[7.4633, 5.0973, 1.6867]","[0.000, 0.000, 0.092]","[0.4387, 0.2441, 0.0]","[0.7511, 0.549, 0.0186]"


In [10]:
prediction_set = [
     [[12.85, 1.6, 2.52, 17.8, 2.48]],
     [[13.73, 1.5, 2.7, 22.5, 3]],
] 

predictions = pd.concat(
    [pd.DataFrame(model.predict(X = pred, return_table=True)) for pred in prediction_set]
)

predictions

Unnamed: 0,features,prediction_class,cumulative_probabilities,prediction_prob,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"[{'alcohol': '12.85', 'malic_acid': '1.60', 'a...",0,"[0.5949, 0.9914, 1.0]","[0.5949, 0.3965, 0.0086]","[0.0797, 0.0778, 0.0051]","[7.4633, 5.0973, 1.6867]","[0.000, 0.000, 0.092]","[0.4387, 0.2441, 0.0]","[0.7511, 0.549, 0.0186]"
0,"[{'alcohol': '13.73', 'malic_acid': '1.50', 'a...",0,"[0.703, 0.9947, 1.0]","[0.703, 0.2916, 0.0053]","[0.0957, 0.0934, 0.0038]","[7.3456, 3.1224, 1.4166]","[0.000, 0.002, 0.157]","[0.5154, 0.1086, 0.0]","[0.8906, 0.4747, 0.0127]"


In [11]:
# Predict at the sample mean
  
sample_mean = (
    [X[i].mean() for i in list(model.feature_names)] # Preserves ordering
) 

prediction_set = [[sample_mean]] 

predictions = pd.concat(
    [pd.DataFrame(model.predict(X = pred, return_table=True)) for pred in prediction_set]
)

predictions

Unnamed: 0,features,prediction_class,cumulative_probabilities,prediction_prob,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",1,"[0.1513, 0.9334, 1.0]","[0.1513, 0.7821, 0.0666]","[0.0397, 0.0523, 0.0224]","[3.8144, 14.9595, 2.9769]","[0.000, 0.000, 0.003]","[0.0736, 0.6796, 0.0228]","[0.229, 0.8846, 0.1105]"


In [12]:
'''
Predict increments of 'ash' holding all else at the sample mean

Maintain order of lm.feature_names, ie, ['alcohol', 'malic_acid', 'ash', 'alcalinity_of_ash', 'total_phenols']
'''

prev_names, post_names = ['alcohol', 'malic_acid'], ['alcalinity_of_ash', 'total_phenols']

mean_prev, mean_post = [X[i].mean() for i in prev_names], [X[i].mean() for i in post_names]


prediction_range = np.linspace(
    X['ash'].min(),
    X['ash'].max(),
    20                          # Number of predictions 
)

prediction_set = [
    [ mean_prev + [i] + mean_post]
    for i in prediction_range  
] 

predictions = pd.concat(
    [pd.DataFrame(model.predict(X = pred, return_table=True)) for pred in prediction_set]
)

predictions.tail()

Unnamed: 0,features,prediction_class,cumulative_probabilities,prediction_prob,std_error,z_statistic,P>|z|,ci_low_0.05,ci_high_0.05
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",1,"[0.3592, 0.9778, 1.0]","[0.3592, 0.6186, 0.0222]","[0.1051, 0.0995, 0.0131]","[3.4171, 6.2167, 1.6951]","[0.001, 0.000, 0.090]","[0.1532, 0.4235, 0.0]","[0.5653, 0.8136, 0.0478]"
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",1,"[0.4162, 0.9825, 1.0]","[0.4162, 0.5663, 0.0175]","[0.1269, 0.1204, 0.0116]","[3.2786, 4.7044, 1.5111]","[0.001, 0.000, 0.131]","[0.1674, 0.3304, 0.0]","[0.6649, 0.8023, 0.0403]"
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",1,"[0.4754, 0.9862, 1.0]","[0.4754, 0.5108, 0.0138]","[0.1478, 0.1411, 0.0102]","[3.2161, 3.6202, 1.3581]","[0.001, 0.000, 0.174]","[0.1857, 0.2342, 0.0]","[0.7651, 0.7873, 0.0338]"
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",0,"[0.5353, 0.9891, 1.0]","[0.5353, 0.4538, 0.0109]","[0.1657, 0.1592, 0.0089]","[3.2306, 2.8497, 1.2303]","[0.001, 0.004, 0.219]","[0.2105, 0.1417, 0.0]","[0.8601, 0.7659, 0.0283]"
0,"[{'alcohol': '13.00', 'malic_acid': '2.34', 'a...",0,"[0.5942, 0.9914, 1.0]","[0.5942, 0.3971, 0.0086]","[0.1789, 0.1729, 0.0077]","[3.3225, 2.2972, 1.1226]","[0.001, 0.022, 0.262]","[0.2437, 0.0583, 0.0]","[0.9448, 0.736, 0.0236]"


### Coefficient Inference Table

- Comprehensive regression inference

In [13]:
pd.DataFrame(model.inference_table())

Unnamed: 0,feature,coefficient,std_error,z_statistic,P>|t|,ci_low_0.05,ci_high_0.05
0,alcohol,-0.3714,0.2611,-1.4224,0.155,-0.8832,0.1404
1,malic_acid,0.6732,0.1879,3.5823,0.0,0.3049,1.0415
2,ash,-2.439,0.8521,-2.8624,0.004,-4.109,-0.7689
3,alcalinity_of_ash,0.4221,0.0862,4.8985,0.0,0.2532,0.591
4,total_phenols,-3.1317,0.4616,-6.7846,0.0,-4.0363,-2.227
5,0:1,-9.7101,3.6148,-2.6862,0.007,-16.795,-2.6251
6,1:2,-5.3455,3.5245,-1.5167,0.129,-12.2535,1.5624


### Variance Inflation Factor

- Generate a VIF table on the models features

In [14]:
pd.DataFrame(model.variance_inflation_factor())

Unnamed: 0,feature,VIF
0,alcohol,1.4244
1,malic_acid,1.2727
2,ash,1.6087
3,alcalinity_of_ash,1.8769
4,total_phenols,1.3988
