In [3]:
import pandas as pd

df= pd.read_csv('dataset/Drug prescription to disease/final.csv')
# Remove duplicate entries
df = df.drop(columns=['Unnamed: 0'])

# Standardize text formatting
df['disease'] = df['disease'].str.lower().str.strip()
df['drug'] = df['drug'].str.lower().str.strip()

# Remove duplicates
df = df.drop_duplicates().reset_index(drop=True)

# Display the cleaned data
print(df.head())

                                         disease                          drug
0                      alkylating agent cystitis            sodium bicarbonate
1                      alkylating agent cystitis  citric acid / sodium citrate
2                           abdominal distension                   bethanechol
3                           abdominal distension                      pamabrom
4  abdominal distension prior to abdominal x-ray                   vasopressin


In [14]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Convert disease names to lowercase to avoid case mismatch
df['disease'] = df['disease'].str.lower()

# TF-IDF Vectorization for Disease Names
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(df['disease'])

# Compute cosine similarity
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

# Function to Recommend Medicines Based on Disease Similarity
def recommend_medicines(disease_name, df, similarity_matrix, top_n=5):
    disease_name = disease_name.lower()  # Convert input to lowercase for matching

    if disease_name not in df['disease'].values:
        return "Disease not found in the dataset."

    # Find all indices of the input disease (some diseases may appear multiple times)
    disease_indices = df[df['disease'] == disease_name].index.tolist()

    # Get similarity scores for all diseases
    sim_scores = []
    for idx in disease_indices:
        sim_scores.extend(list(enumerate(similarity_matrix[idx])))

    # Sort by highest similarity (excluding itself)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1 : top_n + 1]

    # Get top similar disease indices
    similar_disease_indices = [i[0] for i in sim_scores]

    # Recommend medicines based on similar diseases
    recommended_meds = df.iloc[similar_disease_indices]['drug'].unique()

    return recommended_meds if len(recommended_meds) > 0 else ["No recommendations found."]

# Example Usage
disease_name = "abdominal distension"
print(recommend_medicines(disease_name, df, cosine_sim))


['pamabrom' 'bethanechol' 'vasopressin']


In [18]:
from collections import Counter

def recommend_medicines(disease_name, df, similarity_matrix, top_n=5):
    disease_name = disease_name.lower()  # Convert input to lowercase for matching

    if disease_name not in df['disease'].values:
        return "Disease not found in the dataset."

    # Find all indices of the input disease
    disease_indices = df[df['disease'] == disease_name].index.tolist()

    # Get similarity scores for all diseases
    sim_scores = []
    for idx in disease_indices:
        sim_scores.extend(list(enumerate(similarity_matrix[idx])))

    # Sort by highest similarity (excluding the input disease itself)
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)[1 : top_n + 1]

    # Get top similar disease indices
    similar_disease_indices = [i[0] for i in sim_scores]

    # Extract medicines and corresponding diseases
    recommended_pairs = df.iloc[similar_disease_indices][['disease', 'drug']].values.tolist()

    # Rank medicines by frequency
    med_counts = Counter([pair[1] for pair in recommended_pairs])
    ranked_meds = [med for med, count in med_counts.most_common()]

    # Organize output: Show which disease the medicine was taken from
    recommendations = [
        {"medicine": med, "source_disease": next(pair[0] for pair in recommended_pairs if pair[1] == med)}
        for med in ranked_meds
    ]

    return recommendations if recommendations else ["No recommendations found."]

# Example Usage
disease_name = "abdominal distension"
print(recommend_medicines(disease_name, df, cosine_sim))


[{'medicine': 'pamabrom', 'source_disease': 'abdominal distension'}, {'medicine': 'vasopressin', 'source_disease': 'abdominal distension prior to abdominal x-ray'}, {'medicine': 'bethanechol', 'source_disease': 'abdominal distension'}]


In [19]:
pip install Flask

Collecting Flask
  Downloading flask-3.1.0-py3-none-any.whl.metadata (2.7 kB)
Collecting itsdangerous>=2.2 (from Flask)
  Downloading itsdangerous-2.2.0-py3-none-any.whl.metadata (1.9 kB)
Collecting blinker>=1.9 (from Flask)
  Downloading blinker-1.9.0-py3-none-any.whl.metadata (1.6 kB)
Downloading flask-3.1.0-py3-none-any.whl (102 kB)
Downloading blinker-1.9.0-py3-none-any.whl (8.5 kB)
Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Installing collected packages: itsdangerous, blinker, Flask
Successfully installed Flask-3.1.0 blinker-1.9.0 itsdangerous-2.2.0
Note: you may need to restart the kernel to use updated packages.


In [22]:
from flask import Flask
import threading

app = Flask(__name__)

@app.route("/")
def home():
    return "Flask app is running inside Jupyter!"

# Function to run Flask app
def run_flask():
    app.run(debug=False, use_reloader=False)

# Run Flask in a separate thread
flask_thread = threading.Thread(target=run_flask)
flask_thread.start()


 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
Press CTRL+C to quit
127.0.0.1 - - [02/Apr/2025 15:46:09] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [02/Apr/2025 15:46:09] "GET /favicon.ico HTTP/1.1" 404 -
127.0.0.1 - - [02/Apr/2025 15:48:48] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [02/Apr/2025 15:50:02] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [02/Apr/2025 15:50:56] "GET / HTTP/1.1" 200 -
