In [1]:
import string
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords

def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return ' '.join(tokens)

# Example usage
diagnostic = "Patient shows symptoms of acute bronchitis."
cleaned_diagnostic = clean_text(diagnostic)
print(cleaned_diagnostic)


patient shows symptoms acute bronchitis


In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer

# Example diagnostics
diagnostics = ["diagnosis1", "diagnosis2", "diagnosis3"]
cleaned_diagnostics = [clean_text(d) for d in diagnostics]

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(cleaned_diagnostics)


In [8]:
from gensim.models import Word2Vec

# Tokenize sentences for Word2Vec
tokenized_diagnostics = [d.split() for d in cleaned_diagnostics]
model = Word2Vec(sentences=tokenized_diagnostics, vector_size=100, window=5, min_count=1, workers=4)

# Example: Get vector for a word
vector = model.wv['bronchitis']


ImportError: cannot import name 'triu' from 'scipy.linalg' (d:\STUDY\Genie Logiciel ISIMM\DevPFE\backend\healthcare-pfe\MachineLearningDev\.venv\lib\site-packages\scipy\linalg\__init__.py)

In [10]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string

# Initial data collection
data = pd.DataFrame({
    'diagnostic': ["Patient shows symptoms of acute bronchitis.", "Patient has a high fever and sore throat.", "Diagnosis of mild asthma."],
    'prescription': [["Cough syrup", "Rest"], ["Antibiotics", "Rest"], ["Inhaler", "Avoid allergens"]]
})

# Preprocessing
def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return ' '.join(tokens)

data['clean_diagnostic'] = data['diagnostic'].apply(clean_text)

# Text representation using TF-IDF
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data['clean_diagnostic'])

# Model training using k-NN
knn = NearestNeighbors(n_neighbors=min(5, len(data)), metric='cosine')
knn.fit(X)

# Recommendation function
def recommend_prescriptions(diagnostic_text):
    cleaned_text = clean_text(diagnostic_text)
    vec = vectorizer.transform([cleaned_text])
    distances, indices = knn.kneighbors(vec, n_neighbors=min(5, len(data)))
    similar_prescriptions = [data['prescription'].iloc[i] for i in indices.flatten()]
    return similar_prescriptions

# Example recommendation
new_diagnostic = "Patient complains of shortness of breath."
recommended_prescriptions = recommend_prescriptions(new_diagnostic)
print(recommended_prescriptions)


[['Cough syrup', 'Rest'], ['Antibiotics', 'Rest'], ['Inhaler', 'Avoid allergens']]


In [11]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.neighbors import NearestNeighbors
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import string
import nltk

# Download NLTK data
nltk.download('punkt')
nltk.download('stopwords')

# Define the clean_text function
def clean_text(text):
    text = text.lower()
    text = text.translate(str.maketrans('', '', string.punctuation))
    tokens = word_tokenize(text)
    tokens = [word for word in tokens if word not in stopwords.words('english')]
    return ' '.join(tokens)

# Create synthetic data
synthetic_data = pd.DataFrame({
    'diagnostic': [
        "Patient shows symptoms of acute bronchitis.",
        "Patient has a high fever and sore throat.",
        "Diagnosis of mild asthma.",
        "Patient complains of severe headache and nausea.",
        "Patient reports joint pain and swelling."
    ],
    'prescription': [
        ["Cough syrup", "Rest"],
        ["Antibiotics", "Rest"],
        ["Inhaler", "Avoid allergens"],
        ["Pain relievers", "Hydration"],
        ["Anti-inflammatory drugs", "Rest"]
    ]
})

# Preprocess the synthetic data
synthetic_data['clean_diagnostic'] = synthetic_data['diagnostic'].apply(clean_text)

# Convert the cleaned diagnostic texts into TF-IDF vectors
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(synthetic_data['clean_diagnostic'])

# Train a k-NN model on the TF-IDF vectors
knn = NearestNeighbors(n_neighbors=min(5, len(synthetic_data)), metric='cosine')
knn.fit(X)

# Define the recommendation function
def recommend_prescriptions(diagnostic_text):
    cleaned_text = clean_text(diagnostic_text)
    vec = vectorizer.transform([cleaned_text])
    distances, indices = knn.kneighbors(vec, n_neighbors=min(5, len(synthetic_data)))
    similar_prescriptions = [synthetic_data['prescription'].iloc[i] for i in indices.flatten()]
    return similar_prescriptions

# Test the recommendation function with an example diagnostic
new_diagnostic = "Patient complains of shortness of breath."
recommended_prescriptions = recommend_prescriptions(new_diagnostic)
print(recommended_prescriptions)


[['Pain relievers', 'Hydration'], ['Cough syrup', 'Rest'], ['Antibiotics', 'Rest'], ['Anti-inflammatory drugs', 'Rest'], ['Inhaler', 'Avoid allergens']]


[nltk_data] Downloading package punkt to C:\Users\Aziz
[nltk_data]     Hlila\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to C:\Users\Aziz
[nltk_data]     Hlila\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
