In [None]:
from neo4j import GraphDatabase
import pandas as pd

class Neo4JDataLoader:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
    
    def close(self):
        self.driver.close()
    
    def load_symptom_disease_data(self, file_path):
        df = pd.read_csv(file_path)
        with self.driver.session() as session:
            for _, row in df.iterrows():
                session.run("""
                MERGE (d:Disease {name: $disease})
                MERGE (s:Symptom {name: $symptom})
                MERGE (s)-[:INDICATES]->(d)
                """, disease=row['Disease'], symptom=row['Symptoms'])
    
    def load_disease_metadata(self, file_path):
        df = pd.read_csv(file_path)
        with self.driver.session() as session:
            for _, row in df.iterrows():
                session.run("""
                MERGE (d:Disease {name: $disease})
                MERGE (bp:BodyPart {name: $body_part})
                MERGE (s:Specialist {name: $specialist})
                MERGE (ag:AgeGroup {name: $age_group})
                MERGE (d)-[:AFFECTS]->(bp)
                MERGE (d)-[:TREATED_BY]->(s)
                MERGE (d)-[:COMMON_IN]->(ag)
                """, 
                disease=row['Disease'],
                body_part=row['Body Part'],
                specialist=row['Specialist'],
                age_group=row['Age Group'])

# Usage
loader = Neo4JDataLoader("bolt://localhost:7687", "neo4j", "12345678")
loader.load_symptom_disease_data("OGDS.csv")
loader.load_disease_metadata("OGEDS.csv")
loader.close()

[#DC71]  _: <CONNECTION> error: Failed to read from defunct connection IPv4Address(('localhost', 7687)) (ResolvedIPv4Address(('127.0.0.1', 7687))): OSError('No data')


ServiceUnavailable: Failed to read from defunct connection IPv4Address(('localhost', 7687)) (ResolvedIPv4Address(('127.0.0.1', 7687)))

In [None]:
from neo4j import GraphDatabase

class KnowledgeGraphBuilder:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
    
    def close(self):
        self.driver.close()
    
    def create_schema(self):
        with self.driver.session() as session:
            constraints = [
                "CREATE CONSTRAINT disease_name IF NOT EXISTS FOR (d:Disease) REQUIRE d.name IS UNIQUE",
                "CREATE CONSTRAINT symptom_name IF NOT EXISTS FOR (s:Symptom) REQUIRE s.name IS UNIQUE",
                "CREATE CONSTRAINT body_part_name IF NOT EXISTS FOR (b:BodyPart) REQUIRE b.name IS UNIQUE",
                "CREATE CONSTRAINT specialist_name IF NOT EXISTS FOR (s:Specialist) REQUIRE s.name IS UNIQUE",
                "CREATE CONSTRAINT age_group_name IF NOT EXISTS FOR (a:AgeGroup) REQUIRE a.name IS UNIQUE"
            ]
            for constraint in constraints:
                session.run(constraint)
    
    def analyze_graph(self):
        with self.driver.session() as session:
            # Example analysis queries
            result = session.run("""
            MATCH (n) 
            RETURN labels(n)[0] as label, count(*) as count
            ORDER BY count DESC
            """)
            print("\nNode Counts:")
            for record in result:
                print(f"{record['label']}: {record['count']}")
            
            result = session.run("""
            MATCH ()-[r]->() 
            RETURN type(r) as relationship, count(*) as count
            ORDER BY count DESC
            """)
            print("\nRelationship Counts:")
            for record in result:
                print(f"{record['relationship']}: {record['count']}")

# Usage
builder = KnowledgeGraphBuilder("bolt://localhost:7687", "neo4j", "12345678")
builder.create_schema()
builder.analyze_graph()
builder.close()

ServiceUnavailable: Couldn't connect to localhost:7687 (resolved to ('[::1]:7687', '127.0.0.1:7687')):
Failed to establish connection to ResolvedIPv6Address(('::1', 7687, 0, 0)) (reason [WinError 10061] No connection could be made because the target machine actively refused it)
Failed to establish connection to ResolvedIPv4Address(('127.0.0.1', 7687)) (reason [WinError 10061] No connection could be made because the target machine actively refused it)

In [None]:
from neo4j import GraphDatabase
import numpy as np
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
import pandas as pd
import pickle

class GraphEmbedder:
    def __init__(self, uri, user, password):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
    
    def close(self):
        self.driver.close()
    
    def extract_graph_features(self):
        """Extract features for each disease based on its connections"""
        with self.driver.session() as session:
            # Get all diseases with their connected entities
            result = session.run("""
            MATCH (d:Disease)
            OPTIONAL MATCH (d)<-[:INDICATES]-(s:Symptom)
            OPTIONAL MATCH (d)-[:TREATED_BY]->(sp:Specialist)
            OPTIONAL MATCH (d)-[:AFFECTS]->(bp:BodyPart)
            RETURN d.name as disease, 
                   id(d) as node_id,
                   count(DISTINCT s) as symptom_count,
                   count(DISTINCT sp) as specialist_count,
                   count(DISTINCT bp) as bodypart_count,
                   labels(d)[0] as label
            """)
            
            features = [dict(record) for record in result]
            return pd.DataFrame(features)
    
    def generate_embeddings(self, n_components=2):
        """Generate embeddings using PCA on the extracted features"""
        features_df = self.extract_graph_features()
        
        # Select and scale numerical features
        numerical_cols = ['symptom_count', 'specialist_count', 'bodypart_count']
        X = features_df[numerical_cols].values
        
        # Standardize features
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        
        # Apply PCA - using min(n_samples, n_features) components
        n_components = min(n_components, X_scaled.shape[1])
        pca = PCA(n_components=n_components)
        embeddings = pca.fit_transform(X_scaled)
        
        # Create embedding dictionary
        embedding_dict = {}
        for idx, row in features_df.iterrows():
            embedding_dict[row['node_id']] = embeddings[idx]
            embedding_dict[row['disease']] = embeddings[idx]
        
        # Save additional information for reference
        embedding_info = {
            'embeddings': embedding_dict,
            'feature_means': scaler.mean_,
            'feature_stds': scaler.scale_,
            'pca_components': pca.components_,
            'disease_names': features_df['disease'].tolist()
        }
        
        return embedding_info

# Usage
try:
    embedder = GraphEmbedder("bolt://localhost:7687", "neo4j", "12345678")
    embedding_info = embedder.generate_embeddings(n_components=2)
    
    # Save embeddings
    with open('disease_embeddings.pkl', 'wb') as f:
        pickle.dump(embedding_info, f)
    
    print("Embeddings generated successfully!")
    print(f"Number of diseases processed: {len(embedding_info['disease_names'])}")
    print(f"Example embedding for first disease: {embedding_info['embeddings'][embedding_info['disease_names'][0]]}")
    
except Exception as e:
    print(f"Error: {str(e)}")
finally:
    embedder.close()



Embeddings generated successfully!
Number of diseases processed: 819
Example embedding for first disease: [0.41312364 1.39955623]


In [None]:
from flask import Flask, request, render_template
from neo4j import GraphDatabase
from fuzzywuzzy import process
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import re
import numpy as np
from typing import List, Dict, Tuple

app = Flask(__name__)

class MedicalSystem:
    def __init__(self, uri: str, user: str, password: str):
        self.driver = GraphDatabase.driver(uri, auth=(user, password))
        self._load_data()
        
    def _load_data(self):
        """Load all medical data in one pass"""
        with self.driver.session() as session:
            # Load symptoms
            result = session.run("MATCH (s:Symptom) RETURN s.name as name")
            self.all_symptoms = [record['name'] for record in result]
            self.symptom_lower_map = {s.lower(): s for s in self.all_symptoms}
            
            # Load disease-symptom relationships
            result = session.run("""
            MATCH (d:Disease)<-[:INDICATES]-(s:Symptom)
            RETURN d.name as disease, collect(s.name) as symptoms
            """)
            self.disease_symptoms = {record['disease']: record['symptoms'] for record in result}
            
            # Load disease-specialist relationships
            result = session.run("""
            MATCH (d:Disease)-[:TREATED_BY]->(s:Specialist)
            RETURN d.name as disease, collect(s.name) as specialists
            """)
            self.disease_specialists = {record['disease']: record['specialists'] for record in result}
            
        # Prepare TF-IDF vectors
        self.vectorizer = TfidfVectorizer(tokenizer=self._tokenize_medical)
        symptom_texts = [" ".join(symptoms) for symptoms in self.disease_symptoms.values()]
        self.disease_names = list(self.disease_symptoms.keys())
        self.symptom_vectors = self.vectorizer.fit_transform(symptom_texts)

    def _tokenize_medical(self, text: str) -> List[str]:
        return re.findall(r"[a-zA-Z0-9]+(?:[-'][a-zA-Z0-9]+)*", text.lower())

    def analyze(self, text: str) -> Tuple[List[Tuple[str, float]], List[Dict]]:
        """Return both extracted symptoms and recommendations"""
        # 1. Extract high-confidence symptoms with scores
        input_clean = text.lower()
        symptoms_with_scores = []
        
        # Exact matching (100% confidence)
        for symptom_lower, symptom in self.symptom_lower_map.items():
            if symptom_lower in input_clean:
                symptoms_with_scores.append((symptom, 100.0))
        
        # Fuzzy matching
        for symptom in self.all_symptoms:
            if symptom in [s for s, _ in symptoms_with_scores]:
                continue
            matches = process.extract(symptom.lower(), [input_clean], limit=1)
            if matches and matches[0][1] > 95:
                symptoms_with_scores.append((symptom, matches[0][1]))
        
        # TF-IDF matching
        input_terms = " ".join(self._tokenize_medical(input_clean))
        input_vec = self.vectorizer.transform([input_terms])
        cosine_scores = cosine_similarity(input_vec, self.symptom_vectors)[0]
        for idx, score in enumerate(cosine_scores):
            if score > 0.95:
                disease = self.disease_names[idx]
                for symptom in self.disease_symptoms[disease]:
                    if symptom not in [s for s, _ in symptoms_with_scores]:
                        symptoms_with_scores.append((symptom, score * 100))
        
        # Get just the symptom names for recommendations
        symptom_names = [s for s, _ in symptoms_with_scores]
        
        # 2. Get recommendations
        input_vec = self.vectorizer.transform([" ".join(symptom_names)])
        similarities = cosine_similarity(input_vec, self.symptom_vectors)[0]
        top_indices = np.argsort(similarities)[-5:][::-1]
        
        recommendations = []
        for idx in top_indices:
            disease = self.disease_names[idx]
            recommendations.append({
                'disease': disease,
                'confidence': float(similarities[idx]),
                'matching_symptoms': [s for s in symptom_names if s in self.disease_symptoms[disease]],
                'specialists': self.disease_specialists.get(disease, ["General Practitioner"])
            })
        
        return symptoms_with_scores, recommendations

    def close(self):
        self.driver.close()

# Initialize the system
medical_system = MedicalSystem("bolt://44.201.21.92", "neo4j", "partitions-slave-diagrams")

@app.route('/', methods=['GET', 'POST'])
def index():
    symptoms = []
    results = []
    if request.method == 'POST':
        text = request.form.get('symptoms', '').strip()
        if text:
            symptoms, results = medical_system.analyze(text)
    return render_template('index.html', symptoms=symptoms, results=results)

@app.teardown_appcontext
def shutdown(exception=None):
    medical_system.close()

if __name__ == '__main__':
    app.run(debug=True,port=8080)

 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:8080
Press CTRL+C to quit
 * Restarting with stat


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
