# 💊 AI in Pharmacovigilance Project
### Full Workflow: PubMed Literature → NLP Cleaning → AE Detection

## 📥 Phase 1 & 2: Install & Fetch PubMed Abstracts

In [None]:
!pip install biopython spacy

In [None]:
# Import and setup Entrez
from Bio import Entrez
Entrez.email = "your_email@example.com"  # Replace with your email

In [None]:
# Search PubMed
handle = Entrez.esearch(db="pubmed", term="paracetamol adverse events", retmax=100)
record = Entrez.read(handle)
id_list = record["IdList"]
print("PubMed IDs:", id_list[:10])

In [None]:
# Fetch abstracts
from Bio import Medline
handle = Entrez.efetch(db="pubmed", id=id_list, rettype="medline", retmode="text")
records = Medline.parse(handle)
abstracts = [rec["AB"] for rec in records if "AB" in rec]
print(f"Fetched {len(abstracts)} abstracts")

## 🧹 Phase 3: NLP Preprocessing

In [None]:
!python -m spacy download en_core_web_sm

In [None]:
import spacy
nlp = spacy.load("en_core_web_sm")

cleaned_abstracts = []
for text in abstracts:
    doc = nlp(text)
    tokens = [token.lemma_.lower() for token in doc if not token.is_stop and not token.is_punct]
    cleaned_abstracts.append(" ".join(tokens))

## 🚨 Phase 4: AE Detection

In [None]:
ae_terms = ["rash", "headache", "dizziness", "nausea", "vomiting", "liver damage"]

In [None]:
import pandas as pd
results = []
for i, abstract in enumerate(cleaned_abstracts):
    found_ae = [ae for ae in ae_terms if ae in abstract]
    if found_ae:
        results.append({"Abstract_Index": i+1, "AE_Detected": found_ae})

df = pd.DataFrame(results)
df.head()

## 📊 Phase 5: Visualization

In [None]:
from collections import Counter
import matplotlib.pyplot as plt

flat_ae = [ae for sublist in df["AE_Detected"] for ae in sublist]
ae_freq = Counter(flat_ae)

plt.figure(figsize=(8,5))
plt.bar(ae_freq.keys(), ae_freq.values(), color='teal')
plt.title("Adverse Events Frequency")
plt.ylabel("Count")
plt.xticks(rotation=45)
plt.grid(axis='y')
plt.tight_layout()
plt.show()