In [1]:

import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [2]:

df = pd.read_csv("cancer_symptoms_dataset.csv")  # Ensure this file is in the same directory

# Display first few rows
df.head()


Unnamed: 0,Cancer Type,breast pain,swollen lymph nodes,chest pain,skin dimpling,fever,breast lump,weight changes,abnormal bleeding,neck swelling,...,pelvic pain,pain during intercourse,nipple discharge,shortness of breath,hoarseness,frequent infections,swelling,fatigue,chronic cough,difficulty swallowing
0,Thyroid Cancer,0,0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,0,0,0,0
1,Lung Cancer,0,0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,1,0
2,Breast Cancer,1,0,0,0,0,0,0,0,0,...,0,0,1,0,0,0,1,0,0,0
3,Breast Cancer,1,0,0,0,0,1,0,0,0,...,0,0,1,0,0,0,1,0,0,0
4,Lung Cancer,0,0,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,1,0,0


In [3]:
# Split data into features (X) and target labels (y)
X = df.drop(columns=["Cancer Type"])
y = df["Cancer Type"]

# Train-test split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
# Train a Random Forest model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)


In [5]:

# Predictions
y_pred = model.predict(X_test)

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)

# Classification Report
report = classification_report(y_test, y_pred)

# Display Results
print(f"✅ Model Training Complete! Accuracy: {accuracy * 100:.2f}%")
print("\n📊 Classification Report:\n", report)

# Save the trained model
joblib.dump(model, "cancer_prediction_model.pkl")

✅ Model Training Complete! Accuracy: 99.00%

📊 Classification Report:
                  precision    recall  f1-score   support

   Blood Cancer       1.00      1.00      1.00        18
  Breast Cancer       1.00      1.00      1.00        14
Cervical Cancer       0.97      1.00      0.98        31
    Lung Cancer       1.00      0.94      0.97        17
 Thyroid Cancer       1.00      1.00      1.00        20

       accuracy                           0.99       100
      macro avg       0.99      0.99      0.99       100
   weighted avg       0.99      0.99      0.99       100



['cancer_prediction_model.pkl']

In [8]:
def predict_cancer(symptoms_dict):
    """
    Predicts cancer type based on given symptoms.

    Input:
      symptoms_dict (dict): Dictionary where keys are symptom names, and values are 0 (absent) or 1 (present).

    Output:
      Predicted cancer type.
    """

    # Load trained model
    model = joblib.load("cancer_prediction_model.pkl")

    # Convert user input to DataFrame
    input_data = pd.DataFrame([symptoms_dict])

    # Ensure all required columns are present (fill missing columns with 0)
    for col in X.columns:
        if col not in input_data:
            input_data[col] = 0

    # **Ensure the column order is the same as in training**
    input_data = input_data[X.columns]

    # Predict cancer type
    prediction = model.predict(input_data)
    return prediction[0]


In [9]:
# Function to take user input for symptoms
def get_user_symptoms():
    symptoms_list = [
        "chronic cough", "chest pain", "weight loss", "shortness of breath", "fatigue",
        "pelvic pain", "abnormal bleeding", "pain during intercourse", "breast lump",
        "nipple discharge", "breast pain", "skin dimpling", "neck swelling", "hoarseness",
        "difficulty swallowing", "fever", "frequent infections", "bruising", "swollen lymph nodes"
    ]

    user_symptoms = {}
    print("\n🔹 Enter 1 if you have the symptom, 0 if you do not.")

    for symptom in symptoms_list:
        while True:
            try:
                value = int(input(f"Do you have {symptom}? (1 for Yes, 0 for No): "))
                if value in [0, 1]:
                    user_symptoms[symptom] = value
                    break
                else:
                    print("❌ Invalid input! Please enter 1 or 0.")
            except ValueError:
                print("❌ Invalid input! Please enter a number (1 or 0).")

    return user_symptoms

# Get user symptoms dynamically
user_symptoms = get_user_symptoms()

# Predict cancer type based on user input
predicted_cancer = predict_cancer(user_symptoms)
print("\n🔍 Predicted Cancer Type:", predicted_cancer)



🔹 Enter 1 if you have the symptom, 0 if you do not.


Do you have chronic cough? (1 for Yes, 0 for No):  1
Do you have chest pain? (1 for Yes, 0 for No):  1
Do you have weight loss? (1 for Yes, 0 for No):  1
Do you have shortness of breath? (1 for Yes, 0 for No):  1
Do you have fatigue? (1 for Yes, 0 for No):  1
Do you have pelvic pain? (1 for Yes, 0 for No):  1
Do you have abnormal bleeding? (1 for Yes, 0 for No):  1
Do you have pain during intercourse? (1 for Yes, 0 for No):  0
Do you have breast lump? (1 for Yes, 0 for No):  0
Do you have nipple discharge? (1 for Yes, 0 for No):  0
Do you have breast pain? (1 for Yes, 0 for No):  0
Do you have skin dimpling? (1 for Yes, 0 for No):  0
Do you have neck swelling? (1 for Yes, 0 for No):  


❌ Invalid input! Please enter a number (1 or 0).


Do you have neck swelling? (1 for Yes, 0 for No):  00
Do you have hoarseness? (1 for Yes, 0 for No):  0
Do you have difficulty swallowing? (1 for Yes, 0 for No):  0
Do you have fever? (1 for Yes, 0 for No):  0
Do you have frequent infections? (1 for Yes, 0 for No):  0
Do you have bruising? (1 for Yes, 0 for No):  0
Do you have swollen lymph nodes? (1 for Yes, 0 for No):  0



🔍 Predicted Cancer Type: Lung Cancer
