In [1]:
# ==============================
# Jaccard Coefficient Calculation
# Units 5 e-Portfolio Activity
# ==============================

from sklearn.metrics import jaccard_score
import pandas as pd

# Step 1: Create the original data table
# Y & P = 1; N & A = 0 for asymmetric variables (tests and symptoms)
data = {
    "Name": ["Jack", "Mary", "Jim"],
    "Gender": ["M", "F"],  # Symmetric, not converted
    "Fever": [1, 1, 1],
    "Cough": [0, 0, 1],
    "Test-1": [1, 1, 0],
    "Test-2": [0, 0, 0],
    "Test-3": [0, 1, 0],
    "Test-4": [0, 0, 0]
}

# Step 2: Create a DataFrame for easier visualization
df = pd.DataFrame({
    "Name": ["Jack", "Mary", "Jim"],
    "Fever": [1, 1, 1],
    "Cough": [0, 0, 1],
    "Test-1": [1, 1, 0],
    "Test-2": [0, 0, 0],
    "Test-3": [0, 1, 0],
    "Test-4": [0, 0, 0]
})

print("Binary Data Table (Y/P=1, N/A=0):")
print(df.to_string(index=False))

# Step 3: Prepare data vectors for Jaccard calculation
jack = [1,0,1,0,0,0]
mary = [1,0,1,0,1,0]
jim  = [1,1,0,0,0,0]

# Step 4: Calculate Jaccard coefficient for each pair
jac_jack_mary = jaccard_score(jack, mary)
jac_jack_jim  = jaccard_score(jack, jim)
jac_jim_mary  = jaccard_score(jim, mary)

# Step 5: Print the results
print("\nJaccard Coefficient Results:")
print(f"Jack & Mary: {jac_jack_mary:.2f}")
print(f"Jack & Jim:  {jac_jack_jim:.2f}")
print(f"Jim & Mary:  {jac_jim_mary:.2f}")

# Explanation of output:
# The Jaccard coefficient measures similarity between two binary vectors.
# Formula: J = (number of shared 1's) / (number of positions where at least one is 1)
# Higher values indicate higher similarity.


Binary Data Table (Y/P=1, N/A=0):
Name  Fever  Cough  Test-1  Test-2  Test-3  Test-4
Jack      1      0       1       0       0       0
Mary      1      0       1       0       1       0
 Jim      1      1       0       0       0       0

Jaccard Coefficient Results:
Jack & Mary: 0.67
Jack & Jim:  0.33
Jim & Mary:  0.25
