In [6]:
import pandas as pd
from collections import Counter

# === Load prediction CSVs ===
csv1 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_aug_best.csv")
csv2 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_best.csv")
csv3 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_last.csv")
csv4 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/coatnet%202.csv")
csv5 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/swin.csv")

# Ensure all have the same Id order
assert all(csv1["Id"] == csv2["Id"])
assert all(csv1["Id"] == csv3["Id"])
assert all(csv1["Id"] == csv4["Id"])
assert all(csv1["Id"] == csv5["Id"])

# === Perform majority voting (5 models) ===
final_preds = []
tie_breaks = 0

for p1, p2, p3, p4, p5 in zip(csv1["Predicted"], csv2["Predicted"], csv3["Predicted"], csv4["Predicted"], csv5["Predicted"]):
    votes = [p1, p2, p3, p4, p5]
    counts = Counter(votes)
    most_common = counts.most_common()
    
    # Check for tie at the top
    if len(most_common) > 1 and most_common[0][1] == most_common[1][1]:
        vote = p2  # fallback to csv2
        tie_breaks += 1
    else:
        vote = most_common[0][0]
    
    final_preds.append(vote)

# === Build final dataframe ===
final_df = pd.DataFrame({
    "Id": csv1["Id"],
    "Predicted": final_preds
})

# === Save to CSV ===
final_df.to_csv("final_voted_result_5_models.csv", index=False)

print(f"✅ Voting complete! Tie-breaks resolved using csv2 ({tie_breaks} times). File saved as final_voted_result.csv")


✅ Voting complete! Tie-breaks resolved using csv2 (4 times). File saved as final_voted_result.csv


In [7]:
import pandas as pd
from collections import Counter

# === Load prediction CSVs ===
csv1 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_aug_best.csv")
csv2 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_best.csv")
csv3 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_last.csv")

# Ensure all have the same Id order
assert all(csv1["Id"] == csv2["Id"])
assert all(csv1["Id"] == csv3["Id"])

# === Perform majority voting with tie-break ===
final_preds = []
for p1, p2, p3 in zip(csv1["Predicted"], csv2["Predicted"], csv3["Predicted"]):
    votes = [p1, p2, p3]
    counts = Counter(votes)
    # Check for 3-way tie (all predictions different)
    if len(counts) == 3:
        vote = p2   # fallback to csv2’s prediction
    else:
        vote = counts.most_common(1)[0][0]
    final_preds.append(vote)

# === Build final dataframe ===
final_df = pd.DataFrame({
    "Id": csv1["Id"],
    "Predicted": final_preds
})

# === Save to CSV ===
final_df.to_csv("final_voted_result.csv", index=False)

print("✅ Voting complete! Tie-breaks resolved using csv2. File saved as final_voted_result.csv")


✅ Voting complete! Tie-breaks resolved using csv2. File saved as final_voted_result.csv


In [10]:
import pandas as pd
from collections import Counter

# === Load prediction CSVs ===
csv1 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/final_voted_result.csv")
csv2 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_best.csv")
csv3 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/final_voted_result_5_models.csv")

# Ensure all have the same Id order
assert all(csv1["Id"] == csv2["Id"])
assert all(csv1["Id"] == csv3["Id"])

# === Perform majority voting with tie-break ===
final_preds = []
for p1, p2, p3 in zip(csv1["Predicted"], csv2["Predicted"], csv3["Predicted"]):
    votes = [p1, p2, p3]
    counts = Counter(votes)
    # Check for 3-way tie (all predictions different)
    if len(counts) == 3:
        vote = p2   # fallback to csv2’s prediction
    else:
        vote = counts.most_common(1)[0][0]
    final_preds.append(vote)

# === Build final dataframe ===
final_df = pd.DataFrame({
    "Id": csv1["Id"],
    "Predicted": final_preds
})

# === Save to CSV ===
final_df.to_csv("final_voted_result_voted.csv", index=False)

print("✅ Voting complete! Tie-breaks resolved using csv2. File saved as final_voted_result_voted.csv")


✅ Voting complete! Tie-breaks resolved using csv2. File saved as final_voted_result_voted.csv


In [11]:
import pandas as pd
from collections import Counter

# === Load prediction CSVs ===
csv1 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_aug_best.csv")
csv2 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_best.csv")
csv3 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/yolox_last.csv")
csv4 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/coatnet%202.csv")
csv5 = pd.read_csv("https://raw.githubusercontent.com/ZerXXX0/Lung-Disease-Classification/refs/heads/main/Model/swin.csv")

# Ensure all have the same Id order
assert all(csv1["Id"] == csv2["Id"])
assert all(csv1["Id"] == csv3["Id"])
assert all(csv1["Id"] == csv4["Id"])
assert all(csv1["Id"] == csv5["Id"])

# === Detect imbalance and conflict ===
for img_id, p1, p2, p3, p4, p5 in zip(csv1["Id"], csv1["Predicted"], csv2["Predicted"], csv3["Predicted"], csv4["Predicted"], csv5["Predicted"]):
    votes = [p1, p2, p3, p4, p5]
    counts = Counter(votes)
    distribution = sorted(counts.values(), reverse=True)

    # Full conflict: 1-1-1-1-1
    if distribution == [1, 1, 1, 1, 1]:
        print(f"⚠️ Conflict (1-1-1-1-1) on {img_id} | Votes: {votes} | Counts: {dict(counts)}")

    # Imbalance: 3-2 or 2-2-1
    elif distribution in ([3,2], [2,2,1]):
        print(f"🔎 Imbalance ({'-'.join(map(str,distribution))}) on {img_id} | Votes: {votes} | Counts: {dict(counts)}")


🔎 Imbalance (3-2) on test_0010.jpeg | Votes: [0, 0, 0, 1, 1] | Counts: {0: 3, 1: 2}
🔎 Imbalance (3-2) on test_0032.jpeg | Votes: [4, 4, 0, 0, 0] | Counts: {4: 2, 0: 3}
🔎 Imbalance (3-2) on test_0039.jpeg | Votes: [4, 4, 0, 4, 0] | Counts: {4: 3, 0: 2}
🔎 Imbalance (3-2) on test_0111.jpeg | Votes: [0, 4, 0, 0, 4] | Counts: {0: 3, 4: 2}
🔎 Imbalance (3-2) on test_0147.jpeg | Votes: [0, 4, 4, 0, 0] | Counts: {0: 3, 4: 2}
🔎 Imbalance (3-2) on test_0193.jpeg | Votes: [4, 4, 4, 0, 0] | Counts: {4: 3, 0: 2}
🔎 Imbalance (3-2) on test_0216.jpeg | Votes: [4, 2, 2, 2, 4] | Counts: {4: 2, 2: 3}
🔎 Imbalance (3-2) on test_0239.jpeg | Votes: [4, 0, 0, 0, 4] | Counts: {4: 2, 0: 3}
🔎 Imbalance (3-2) on test_0244.jpeg | Votes: [4, 4, 4, 0, 0] | Counts: {4: 3, 0: 2}
🔎 Imbalance (3-2) on test_0274.jpeg | Votes: [4, 2, 2, 4, 4] | Counts: {4: 3, 2: 2}
🔎 Imbalance (3-2) on test_0275.jpeg | Votes: [0, 4, 4, 0, 0] | Counts: {0: 3, 4: 2}
🔎 Imbalance (3-2) on test_0331.jpeg | Votes: [2, 0, 2, 0, 0] | Counts: {2: 2