In [None]:
import sys
import os

# Add project root directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(r"C:\Users\anilj.ANIL_JOSEPH\OneDrive\Desktop\final_dna_testing"), '..')))

from model_architeture.bilstm_bahdanu_model import CpGPredictor
from preprocess import  bilstm_bahdanu_preprocess



In [12]:
import torch
import os
project_root = os.path.abspath("..")  # since you're in test/
sys.path.append(project_root)

# Construct the path relative to project root
model_path = os.path.join(project_root, "model", "bilstm_bahdanu_final.pt")

In [13]:
import os
import sys
import logging
import torch
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime

# ==== Device Setup ====
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def test_sequences(
    sequences,
    model_path= model_path ,
    output_dir="./logs/testing/testing_image",
    log_dir="./logs/testing/testing_logs",
    save_attention=True,
    save_csv=True
):
    """
    Predicts CpG counts for a list of sequences using a pre-trained model and logs results.

    Args:
        sequences (list[str]): List of DNA sequences.
        model_path (str): Path to the saved model.
        output_dir (str): Directory to save attention plots.
        log_dir (str): Directory to save log file.
        save_attention (bool): Whether to save attention bar plots.
        save_csv (bool): Whether to save results in CSV format.
    """

    os.makedirs(output_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)

    # Setup timestamped logging
    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    log_file = os.path.join(log_dir, f"test_log_{timestamp}.log")
    logging.basicConfig(
        filename=log_file,
        filemode="w",
        level=logging.INFO,
        format="%(asctime)s - %(levelname)s - %(message)s"
    )

    # Load the model
    model = CpGPredictor(
        input_dim=5,
        embedding_dim=64,
        hidden_dim=256,
        num_layers=2,
        dropout=0.33262129231366233
    ).to(device)

    if not os.path.exists(model_path):
        raise FileNotFoundError(f"Model file '{model_path}' not found.")
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.eval()
    logging.info("✅ Model loaded successfully.")

    # For CSV logging
    results = []

    for idx, seq in enumerate(sequences, 1):
        pred, attn = predict_cpg(model, seq, return_attention=True)
        actual = count_cpgs(seq)
        error = abs(pred - actual)

        logging.info(f"\nSequence {idx}:")
        logging.info(f"📌 {seq}")
        logging.info(f"✅ Actual CpG: {actual}, 🧠 Predicted CpG: {pred:.2f}, ❗ Error: {error:.2f}")

        results.append({
            "Sequence_ID": f"Seq_{idx}",
            "Sequence": seq,
            "Actual_CpG": actual,
            "Predicted_CpG": round(pred, 2),
            "Error": round(error, 2)
        })

        if save_attention:
            plt.figure(figsize=(14, 4))
            plt.bar(range(len(attn)), attn)
            plt.title(f"Attention - Seq {idx}\nActual: {actual}, Predicted: {pred:.2f}")
            plt.xlabel("Nucleotide Position")
            plt.ylabel("Attention Weight")
            plt.tight_layout()
            plot_path = os.path.join(output_dir, f"attention_seq_{idx}.png")
            plt.savefig(plot_path)
            plt.close()
            logging.info(f"🖼️ Attention plot saved to: {plot_path}")

    # Save results to CSV
    if save_csv:
        df = pd.DataFrame(results)
        csv_path = os.path.join(output_dir, f"prediction_results_{timestamp}.csv")
        df.to_csv(csv_path, index=False)
        logging.info(f"📝 Predictions saved to: {csv_path}")

    print(f"✅ Testing complete. Logs: {log_file}, Results: {csv_path if save_csv else 'Not saved'}")
    

In [None]:
test_seqs = [
    "ATGCGCGTANCGCCGNCCGGCGCGTANCTACGGCGCGTANCCGCGTANCGCCGNCCGGCGCGTANCTANCGCGGCGCGTAGCGTANCCGCGTANNCCGCGTANCAT",
    "CGTANCGCGCGTANCGCCGNCGTACGCGTANCTACGGCGCGTANCCGCGTANCGCCGCGCGCGTAGCGTANCGCGCGTANCTACGGCGCGTANCAT",
]

test_sequences(test_seqs)

# ==== Attention Visualization ====
plt.figure(figsize=(14, 4))
plt.bar(range(len(attn)), attn)
plt.title(f"Attention Weights for Input Sequence\nActual: {actual}, Predicted: {pred:.2f}")
plt.xlabel("Nucleotide Position")
plt.ylabel("Attention Weight")
plt.tight_layout()
plt.savefig("attention_sample.png")
plt.show()


FileNotFoundError: Model file 'C:\Users\anilj.ANIL_JOSEPH\OneDrive\Desktop\model\bilstm_bahdanu_final.pt' not found.

: 

: 

: 

: 

Thanks for sharing the structure and code—this is a **classic path issue** due to how you're setting the project root. Let's walk through everything **from the ground up**, fix the problem properly, and make your imports and model loading smooth.

---

## ✅ 1. Correct Project Structure

You shared this:
```
final_dna_testing/
├── model_arctitecture/
│   └── bilstm_bahdanu_model.py
├── preprocess/
│   └── bilstm_bahdanu_preprocess.py
├── test/
│   └── bilstm_bahdanu_test.ipynb
├── training/
│   └── bilstm_bahdanu_training.py
└── model/
    └── bilstm_bahdanu_final.pt
```

---

## ✅ 2. Fix the Import & Path Setup (One Time, Globally)

In your `test/bilstm_bahdanu_test.ipynb`, add this **once at the top**:

```python
import sys
import os

# Automatically get the project root (final_dna_testing)
project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
sys.path.append(project_root)

# Now you can import from anywhere in the project
from model_arctitecture.bilstm_bahdanu_model import CpGPredictor
from preprocess import bilstm_bahdanu_preprocess
```

If you're in a **Jupyter Notebook**, `__file__` doesn't work, so do this instead:

```python
import sys
import os

# Manually set root (use correct absolute path to your project)
project_root = r"C:\Users\anilj.ANIL_JOSEPH\OneDrive\Desktop\final_dna_testing"
sys.path.append(project_root)

# ✅ Import working now
from model_arctitecture.bilstm_bahdanu_model import CpGPredictor
from preprocess import bilstm_bahdanu_preprocess
```

---

## ✅ 3. Fix the Model Path

You’re using:
```python
project_root = os.path.abspath("..")  # This goes one level up from test/
```

But the error shows it's looking for:
```
C:\Users\anilj.ANIL_JOSEPH\OneDrive\Desktop\model\bilstm_bahdanu_final.pt
```
That’s wrong! It’s skipping the `final_dna_testing` directory.

Instead, do:
```python
model_path = os.path.join(project_root, "model", "bilstm_bahdanu_final.pt")
```

Make sure the model is really at:
```
C:\Users\anilj.ANIL_JOSEPH\OneDrive\Desktop\final_dna_testing\model\bilstm_bahdanu_final.pt
```

If it is, you're good!

---

## ✅ 4. How to Check Model Path

Add this line to debug:
```python
print("Model path:", model_path)
print("Exists:", os.path.exists(model_path))
```

---

## ✅ 5. Recap: What Your Top Notebook Cell Should Contain

```python
import sys
import os

# Set project root manually for Jupyter
project_root = r"C:\Users\anilj.ANIL_JOSEPH\OneDrive\Desktop\final_dna_testing"
sys.path.append(project_root)

from model_arctitecture.bilstm_bahdanu_model import CpGPredictor
from preprocess import bilstm_bahdanu_preprocess

import torch

model_path = os.path.join(project_root, "model", "bilstm_bahdanu_final.pt")
print("Model path:", model_path)
print("Exists:", os.path.exists(model_path))
```

---

## ✅ Bonus Tip: Don’t Miss Typos

You named the folder `model_arctitecture`, but usually it should be `model_architecture`. Just double-check spelling everywhere, especially in folder names and import lines.

---

Let me know if you want help converting this setup into a Python package (`__init__.py` files etc.) for cleaner reuse in the future.

In [16]:
import sys
import os

# Automatically get the project root (final_dna_testing)
project_root = r'C:\Users\anilj.ANIL_JOSEPH\OneDrive\Desktop\final_dna_testing'
import sys
sys.path.append(project_root)

# project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
# sys.path.append(project_root)

# Now you can import from anywhere in the project
from model_arctitecture.bilstm_bahdanu_model import CpGPredictor
from preprocess import bilstm_bahdanu_preprocess


ModuleNotFoundError: No module named 'model_arctitecture'

In [17]:
import sys
import os

# Add project root directory to sys.path
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(r"C:\Users\anilj.ANIL_JOSEPH\OneDrive\Desktop\final_dna_testing"), '..')))

from model_architeture.bilstm_bahdanu_model import CpGPredictor
from preprocess import  bilstm_bahdanu_preprocess