In [None]:
!pip install -q "numpy<2.1" "attrs>=22.2.0" "transformers<4.39.0" flask pyngrok torch enformer-pytorch kipoiseq

In [None]:
import torch
from enformer_pytorch import Enformer
import kipoiseq
import numpy as np
from flask import Flask, request, jsonify
from pyngrok import ngrok
import os

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]



In [None]:
NGROK_AUTH_TOKEN = "2sRwurVcU7agbYO6FF4xVewM0Nn_V2ziogsnNCiDdgAcRFD8"
os.environ["NGROK_AUTHTOKEN"] = NGROK_AUTH_TOKEN

In [None]:
# Enformer Model
model = Enformer.from_pretrained('EleutherAI/enformer-official-rough')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval() # Set model to evaluation mode for inference
print(f"- - - - Enformer loaded on {device} successfully.")



- - - - Enformer loaded on cuda successfully.


In [None]:
#
seq_length = 393_216
def preprocess_dna(dna_seq: str):
    """
    Converts a DNA string into a one-hot encoded tensor that the Enformer can understand.
    """
    center_index = len(dna_seq) // 2
    start_index = center_index - (seq_length // 2)
    end_index = start_index + seq_length

    padded_sequence = 'N' * max(0, -start_index) + dna_seq[max(0, start_index):end_index]
    padded_sequence = padded_sequence.ljust(seq_length, 'N')

    # One-hot encode using kipoiseq
    one_hot = kipoiseq.transforms.functional.one_hot_dna(padded_sequence)

    # Convert numpy array to PyTorch tensor and add batch dimension
    return torch.tensor(one_hot, dtype=torch.float32).unsqueeze(0).to(device)


In [None]:
# Flask API
print("- - - - Setting up Flask web server...")
app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    data = request.json
    if 'dna_seq' not in data:
        return jsonify({"error": "Missing 'dna_seq'"}), 400

    dna_sequence = data['dna_seq']

    try:
        # Preprocess
        input_tensor = preprocess_dna(dna_seq)

        # Run Model (No gradient calculation needed for inference)
        with torch.no_grad():
            predictions = model(input_tensor)

        # The output of Enformer is a dictionary of tracks.
        # We average the human track for a simplified prediction score.
        # Note: The specific key depends on the implementation version.
        human_output = predictions['human']
        average_score = torch.mean(human_output).item()

        return jsonify({
            "status": "success",
            "average_prediction_score": average_score
        })

    except Exception as e:
        return jsonify({"error": str(e)}), 500

- - - - Setting up Flask web server...


In [None]:
print("- - - - Starting ngrok tunnel...")
public_url = ngrok.connect(5000).public_url
print(f"- - - - Enformer API is live at: {public_url}")

# Start Flaskcc
if __name__ == '__main__':
    app.run(port=5000)

- - - - Starting ngrok tunnel...
- - - - Enformer API is live at: https://b04d-34-83-218-32.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
