# Title: Personalized Medical Recommendation System with Machine Learning

# Description:

Welcome to our cutting-edge Personalized Medical Recommendation System, a powerful platform designed to assist users in understanding and managing their health. Leveraging the capabilities of machine learning, our system analyzes user-input symptoms to predict potential diseases accurately.

# load dataset & tools

In [2]:
import  pandas as pd

In [3]:
dataset = pd.read_csv('dataset/Training.csv')


In [4]:
dataset;

In [5]:
# vals = dataset.values.flatten()

In [31]:
dataset.shape

(4920, 133)

# train test split

In [6]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

In [7]:
# Drop target column and separate the features (X) and target (y)
X = dataset.drop('prognoza', axis=1)
y = dataset['prognoza']

# Check for missing values
print(dataset.isnull().sum())  # Check if there are missing values in any column

# Encoding the target variable 'prognosis'
le = LabelEncoder()
le.fit(y)
Y = le.transform(y)

# Check the mapping of labels to encoded values
print(list(zip(le.classes_, le.transform(le.classes_))))

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=20)


kruarje                          0
 Skuqja_e_lëkurës                0
 shpërthimet_e_lëkurës_nyjore    0
 teshtitje_e_vazhdueshme         0
 dridhje                         0
                                ..
 flluskë                         0
 plagë_kuqe_rreth_hundës         0
 kore_e_verdhë                   0
                                 0
prognoza                         0
Length: 133, dtype: int64
[('(vertigo) Vertigo Pozicionale Paroymsal', np.int64(0)), ('Aknet', np.int64(1)), ('Alergji', np.int64(2)), ('Artriti', np.int64(3)), ('Astma bronkiale', np.int64(4)), ('Dengue', np.int64(5)), ('Diabeti ', np.int64(6)), ('Ftohja e zakonshme', np.int64(7)), ('GERD', np.int64(8)), ('Gastroenteriti', np.int64(9)), ('Hemorroidet dimorfike (grumbullat)', np.int64(10)), ('Hepatiti A', np.int64(11)), ('Hepatiti B', np.int64(12)), ('Hepatiti C', np.int64(13)), ('Hepatiti D', np.int64(14)), ('Hepatiti E', np.int64(15)), ('Hepatiti alkoolik', np.int64(16)), ('Hipertensioni', np.int64(17)), (

# Training top models

In [8]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, confusion_matrix
import numpy as np

# Generate a synthetic classification dataset
X, y = make_classification(n_samples=1000, n_features=20, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Apply absolute transformation to remove negative values (if using MultinomialNB)
X_train = np.abs(X_train)
X_test = np.abs(X_test)

# Create a dictionary to store models
models = {
    'SVC': SVC(kernel='linear'),
    'RandomForest': RandomForestClassifier(n_estimators=100, random_state=42),
    'GradientBoosting': GradientBoostingClassifier(n_estimators=100, random_state=42),
    'KNeighbors': KNeighborsClassifier(n_neighbors=5),
    'MultinomialNB': MultinomialNB()
}

# Loop through the models, train, test, and print results
for model_name, model in models.items():
    # Train the model
    model.fit(X_train, y_train)

    # Test the model
    predictions = model.predict(X_test)

    # Calculate accuracy
    accuracy = accuracy_score(y_test, predictions)
    print(f"{model_name} Accuracy: {accuracy:.4f}")

    # Calculate confusion matrix
    cm = confusion_matrix(y_test, predictions)
    print(f"{model_name} Confusion Matrix:")
    print(np.array2string(cm, separator=', '))

    print("\n" + "="*40 + "\n")


SVC Accuracy: 0.5333
SVC Confusion Matrix:
[[89, 56],
 [84, 71]]


RandomForest Accuracy: 0.6733
RandomForest Confusion Matrix:
[[110,  35],
 [ 63,  92]]


GradientBoosting Accuracy: 0.6933
GradientBoosting Confusion Matrix:
[[109,  36],
 [ 56,  99]]


KNeighbors Accuracy: 0.5267
KNeighbors Confusion Matrix:
[[81, 64],
 [78, 77]]


MultinomialNB Accuracy: 0.5167
MultinomialNB Confusion Matrix:
[[90, 55],
 [90, 65]]




# single prediction

In [9]:
# selecting svc
svc = SVC(kernel='linear')
svc.fit(X_train,y_train)
ypred = svc.predict(X_test)
accuracy_score(y_test,ypred)

0.5333333333333333

In [10]:
# save svc
import pickle
pickle.dump(svc,open('svc.pkl','wb'))

In [11]:
# load model
svc = pickle.load(open('svc.pkl','rb'))

In [12]:
# Assuming X_test is a NumPy array (which is common after train_test_split)
print("Predicted Disease:", svc.predict(X_test[0].reshape(1, -1)))  # Reshape if it's a NumPy array
print("Actual Disease:", y_test[0])


Predicted Disease: [0]
Actual Disease: 1


In [13]:
print("Predicted Disease:", svc.predict(X_test[100].reshape(1, -1)))  # Use array indexing, no .iloc
print("Actual Disease:", y_test[100])  # y_test is likely a NumPy array, so no .iloc needed


Predicted Disease: [0]
Actual Disease: 1


In [14]:
print("Predicted Disease:", svc.predict(X_test[100].reshape(1, -1)))  # Use array indexing, no .iloc
print("Actual Disease:", y_test[100])  # y_test is likely a NumPy array, so no .iloc needed


Predicted Disease: [0]
Actual Disease: 1


# Recommendation System and Prediction

# load database and use logic for recommendations

In [15]:
sym_des = pd.read_csv("/Users/bleonitshillova/Desktop/Medical-Diagnoses SHQIP/dataset/symtoms_df-sq.csv", on_bad_lines='skip')
precautions = pd.read_csv("/Users/bleonitshillova/Desktop/Medical-Diagnoses SHQIP/dataset/precautions_df-sq.csv",on_bad_lines='skip')
workout = pd.read_csv("/Users/bleonitshillova/Desktop/Medical-Diagnoses SHQIP/dataset/workout_df-sq.csv",on_bad_lines='skip')
description = pd.read_csv("/Users/bleonitshillova/Desktop/Medical-Diagnoses SHQIP/dataset/description-sq.csv",on_bad_lines='skip')
medications = pd.read_csv("/Users/bleonitshillova/Desktop/Medical-Diagnoses SHQIP/dataset/medications-sq.csv",on_bad_lines='skip')
diets = pd.read_csv("/Users/bleonitshillova/Desktop/Medical-Diagnoses SHQIP/dataset/diets-sq.csv",on_bad_lines='skip')


In [16]:
def helper(dis):
    # Fetch the description from the description DataFrame
    desc = description[description['Sëmundje'] == dis]['përshkrimi']
    desc = " ".join([w for w in desc]) if not desc.empty else "No description available"

    # Fetch precautions for the disease, if available
    pre = precautions[precautions['Sëmundje'] == dis][['Masaparaprake_1', 'Masaparaprake_2', 'Masaparaprake_3', 'Masaparaprake_4']]
    if pre.empty:
        pre = ["No precautions available"]
    else:
        pre = pre.values[0]  # Take the first row of the precaution values

    # Fetch medications for the disease, if available
    med = medications[medications['Sëmundje'] == dis]['mjekim']
    med = med.tolist() if not med.empty else ["No medications available"]

    # Fetch diet recommendations for the disease, if available
    die = diets[diets['Sëmundje'] == dis]['diet']
    die = die.tolist() if not die.empty else ["No diet recommendations available"]

    # Fetch workout suggestions for the disease, if available
    wrkout = workout[workout['Sëmundje'] == dis]['stërvitje']
    wrkout = wrkout.tolist() if not wrkout.empty else ["No workout recommendations available"]

    return desc, pre, med, die, wrkout

symptoms_dict = {
    'kruarje': 0, 'Skuqja_e_lëkurës': 1, 'shpërthimet_e_lëkurës_nyjore': 2, 
    'teshtitje_e_vazhdueshme': 3, 'dridhje': 4, 'të_dridhura': 5, 'dhimbje_nyjesh': 6, 
    'dhimbje_barku': 7, 'aciditeti': 8, 'ulçera_në_gjuhë': 9, 'humbje_muskujsh': 10, 
    'të_vjella': 11, 'djegie_miktruimi': 12, 'njolla_urinimi': 13, 'lodhje': 14, 
    'shtim_peshe': 15, 'ankthi': 16, 'duart_dhe_këmbët_e_ftohta': 17, 'ndryshimet_e_humorit': 18, 
    'humbje_peshe': 19, 'shqetësim': 20, 'letargji': 21, 'arna_në_fyt': 22, 
    'Niveli_i_parregullt_i_sheqerit': 23, 'kollë': 24, 'temperaturë_e_lartë': 25, 
    'sytë_mbytur': 26, 'gulçim': 27, 'djersitje': 28, 'dehidratim': 29, 
    'dispepsi': 30, 'dhimbje_koke': 31, 'lëkura_e_kuqe': 32, 'urina_e_errët': 33, 
    'nauze': 34, 'humbja_e_oreksit': 35, 'dhimbje_pas_syve': 36, 'Dhimbja_e_shpinës': 37, 
    'kapsllëk': 38, 'diarre': 39, 'ethe_të_lehta': 40, 'urina_e_verdhë': 41, 
    'zverdhja_e_syve': 42, 'dështimi_akut_i_mëlçisë': 43, 'lëngu_mbingarkues': 44, 
    'ënjtja_e_stomakut': 45, 'nyjet_limfatikët_e_fryrë': 46, 'Sëmundje': 47, 
    'vizion_turbullt_dhe_i_shtrembëruar': 48, 'gëlbazë': 49, 'acarim_fyti': 50, 
    'skuqje_e_syve': 51, 'presioni_i_sinusit': 52, 'rrjedhje_e_hundës': 53, 
    'mbingarkesë': 54, 'dhimbje_gjoksi': 55, 'dobësi_në_gjymtyrë': 56, 'rrahje_të_shpejta_të_zemrës': 57, 
    'dhimbje_gjatë_lëvizjeve_të_zorrëve': 58, 'dhimbje_në_rajonin_anal': 59, 'jashtëqitje_e_përgjakshme': 60, 
    'acarim_në_anus': 61, 'Dhimbja_e_qafës': 62, 'marramendje': 63, 'ngërçe': 64, 
    'mavijosje': 65, 'obeziteti': 66, 'këmbë_të_fryra': 67, 'enë_të_fryra_të_gjakut': 68, 
    'Fytyra_dhe_sy_të_fryrë': 69, 'tiroidë_e_zmadhuar': 70, 'thonjtë_e_brishtë': 71, 
    'ekstremitetet_e_fryra': 72, 'urinë_e_tepruar': 73, 'kontakte_extra_martesore': 74, 
    'tharje_dhe_dridhje_buzesh': 75, 'të_folurit_të_paqartë': 76, 'dhimbje_gjuri': 77, 
    'Dhimbja_e_nyjeve_të_kofshës': 78, 'dobësi_muskulare': 79, 'qafa_e_ngurtë': 80, 
    'ënjtje_nyjesh': 81, 'Lëvizja_ngurtësi': 82, 'rrotullime_lëvizjesh': 83, 'humbja_e_ekuilibrit': 84, 
    'paqëndrueshmëri': 85, 'dobësi_e_një_anës_trupore': 86, 'humbja_e_eres': 87, 
    'shqetësimi_i_fshikëzës': 88, 'Furinë_me_erë_të_keqe': 89, 'Ndjenja_e_vazhdueshme_e_urinës': 90, 
    'kalimi_i_gazeve': 91, 'kruarje_e_brendshme': 92, 'look_toksik_(tifos)': 93, 'depresioni': 94, 
    'nervozizëm': 95, 'dhimbje_muskulore': 96, 'altered_sensorium': 97, 'njollat_e_kuqe_mbi_trup': 98, 
    'dhimbje_barku': 99, 'menstruacione_jo_normale': 100, 'arna_diskromatike': 101, 
    'lotim_nga_sytë': 102, 'oreksi_i_shtuar': 103, 'poliuria': 104, 'historia_familjare': 105, 
    'mukoide_sputum': 106, 'pështymë_e_ndryshkur': 107, 'mungesë_përqendrimi': 108, 
    'shqetësimet_vizuale': 109, 'marrja_e_transfuzionit_të_gjakut': 110, 'marrja_e_injeksioneve_josterile': 111, 
    'koma': 112, 'gjakderdhje_në_stomak': 113, 'zgjerimi_i_barkut': 114, 'historia_e_konsumimit_të_alkoolit': 115, 
    'lëngu_mbingarkues': 116, 'gjak_në_sputum': 117, 'venat_e_shqara_në_viç': 118, 'palpitacione': 119, 
    'ecje_dhe_dhimbje': 120, 'puçrrat_e_mbushura_me_qelb': 121, 'pika_te_zeza': 122, 
    'lëkundje': 123, 'lëkurë_lëkurë': 124, 'pluhuri_si_argjendi': 125, 'dhëmbëzat_e_vogla_në_thonj': 126, 
    'thonjtë_inflamator': 127, 'flluskë': 128, 'plagë_kuqe_rreth_hundës': 129, 'kore_e_verdhë': 130, 
    'prognoza': 131
}
diseases_list = {
  15: "Infeksion mykotik",
  4: "Alergji",
  16: "GERD",
  9: "Kolestaza kronike",
  14: "Reagimiidrogës",
  33: "Sëmundja e ulçerës peptike",
  1: "SIDA",
  12: "Diabeti",
  17: "Gastroenteriti",
  6: "Astma bronkiale",
  23: "Hipertensioni",
  30: "Migrena",
  7: "Spondiloza e qafës së mitrës",
  32: "Paraliza (hemorragjia e trurit)",
  28: "Verdhëza",
  29: "Malaria",
  8: "Lija e dhenve",
  11: "Dengue",
  37: "Tifoja",
  40: "Hepatiti A",
  19: "Hepatiti B",
  20: "Hepatiti C",
  21: "Hepatiti D",
  22: "Hepatiti E",
  3: "Hepatiti alkoolik",
  36: "Tuberkulozi",
  10: "Ftohja e zakonshme",
  34: "Pneumonia",
  13: "Hemorroidetdimorfike(grumbullat)",
  18: "Sulmi në zemër",
  39: "Venat me variçe",
  26: "Hipotireoza",
  24: "Hipertiroidizmi",
  25: "Hipoglicemia",
  31: "Osteoartrozë",
  5: "Artriti",
  0: "(vertigo) Vertigo Pozicionale Paroymsal",
  2: "Aknet",
  38: "Infeksioni i traktit urinar",
  35: "Psoriasis",
  27: "Impetigo"
}


# Model Prediction function
def get_predicted_value(patient_symptoms):
    try:
        input_vector = np.zeros(len(symptoms_dict))  # Initialize with the correct number of features (132)
        
        # Loop over each symptom provided by the user
        for item in patient_symptoms:
            # Check if the symptom exists in the symptoms dictionary
            if item in symptoms_dict:
                input_vector[symptoms_dict[item]] = 1
            else:
                print(f"Warning: Symptom '{item}' not found in the symptoms dictionary.")
        
        # Make the prediction using the model
        predicted_disease = svc.predict([input_vector])[0]
        return diseases_list.get(predicted_disease, "Disease not found")
    except KeyError as e:
        return f"Error with symptoms: {e}"
    except Exception as e:
        return f"Prediction failed: {e}"


In [21]:
# Test 1
# Split the user's input into a list of symptoms (assuming they are comma-separated) # itching,skin_rash,nodal_skin_eruptions
symptoms = input("Enter your symptoms: ")
user_symptoms = [s.strip() for s in symptoms.split(',')]
user_symptoms = [symptom.strip("[]' ") for symptom in user_symptoms]


predicted_disease = get_predicted_value(user_symptoms)

try:
    dis_des, precautions, medications, rec_diet, workout = helper(predicted_disease)
except KeyError as e:
    print(f"Missing key in the dataset: {e}")
    dis_des = "Description not available"
    precautions = [["Precautions not available"]]
    medications = ["Medications not available"]
    rec_diet = ["Diet recommendations not available"]
    workout = ["Workout recommendations not available"]


print("=================predicted disease============")
print(predicted_disease)
print("=================description==================")
print(dis_des)
print("=================precautions==================")
for i, p in enumerate(precautions[0], 1):
    print(i, ": ", p)

print("=================medications==================")
for i, m in enumerate(medications, 1):
    print(i, ": ", m)

print("=================workout==================")
for i, w in enumerate(workout, 1):
    print(i, ": ", w)

print("=================diets==================")
for i, Diet in enumerate(rec_diet, 1):
    print(i, ": ", Diet)



TypeError: list indices must be integers or slices, not str

In [20]:
symptoms = input("Enter your symptoms (comma-separated): ")
user_symptoms = [s.strip() for s in symptoms.split(',') if s.strip()]
if not user_symptoms:
    print("Please enter at least one symptom.")
    exit()

try:
    predicted_disease = get_predicted_value(user_symptoms)
    desc, pre, med, die, wrkout = helper(predicted_disease)
except Exception as e:
    print(f"An error occurred: {e}")
    predicted_disease = "Unknown"
    desc, pre, med, die, wrkout = "No description", [["No precautions"]], ["No medications"], ["No diets"], ["No workouts"]

print("=================predicted disease============\n")
print(predicted_disease)

print("\n=================description==================\n")
print(desc)

print("\n=================precautions==================\n")
if pre and isinstance(pre[0], list):
    for i, p_i in enumerate(pre[0], 1):
        print(i, ": ", p_i)
else:
    print("No precautions available")

print("\n=================medications==================\n")
for i, m_i in enumerate(med, 1):
    print(i, ": ", m_i)

print("\n=================workout==================\n")
for i, w_i in enumerate(wrkout, 1):
    print(i, ": ", w_i)

print("\n=================diets==================\n")
for i, d_i in enumerate(die, 1):
    print(i, ": ", d_i)


An error occurred: list indices must be integers or slices, not str

Unknown


No description


1 :  No precautions


1 :  No medications


1 :  No workouts


1 :  No diets


In [19]:
# let's use pycharm flask app
# but install this version in pycharm
import sklearn
print(sklearn.__version__)

1.5.2
