In [2]:
# McFadden Pseudo-R-squared for Logistic Regression (sometimes called likelihood ratio index)

import pandas as pd
from math import log, exp
from sklearn.linear_model import LogisticRegression

dataFrame = pd.read_csv("https://bit.ly/33ebs2R", delimiter=",")

# Logistic regression
X = dataFrame.values[:, :-1]
Y = dataFrame.values[:, -1]

model = LogisticRegression(penalty=None)
model.fit(X,Y)

# Coefficients beta1(b1) and beta0(b0)
b1 = model.coef_[0][0]  # beta1
b0 = model.intercept_[0]  # beta0

print("Coefficients:")
print("beta1:", b1)
print("beta0:", b0)

# Calculating the r squared coefficient for Logistic Regression
points = list(pd.read_csv("https://bit.ly/33ebs2R", delimiter=",").itertuples(index=False))

# Logistic function formula
def logistic_func(x):
    p = 1.0 / (1.0 + exp(-(b0 + b1 * x)))
    return p

# Calculating logarithm of fit likelihood
log_fit_likelihood = sum(log(logistic_func(p.x)) * p.y + log(1.0 - logistic_func(p.x)) * (1.0 - p.y) for p in points)

# Calculating log likelihood without adjustment
likelihood = sum(p.y for p in points) / len(points)

log_likelihood = sum(log(likelihood) * p.y + log(1.0 - likelihood) * (1.0 - p.y) for p in points)

# Calculatin R-squared 
r2 = (log_likelihood - log_fit_likelihood) / log_likelihood
print(f"McFadden Pseudo-R-squared: {r2}")

Coefficients:
beta1: 0.6926893863085579
beta0: -3.175805042563376
McFadden Pseudo-R-squared: 0.3064561053776835


In [4]:
# A score of R-squared 0.30645 means that the x variables weakly explain the y variables. 
# An R2 coefficient score closer to 0.0 indicates a weak relationship between the variables under study, 
# a score closer to 1.0 - a good relationship.