In [3]:
!pip install -U sentence-transformers

Collecting sentence-transformers
  Downloading sentence_transformers-5.0.0-py3-none-any.whl.metadata (16 kB)
Downloading sentence_transformers-5.0.0-py3-none-any.whl (470 kB)
   ---------------------------------------- 0.0/470.2 kB ? eta -:--:--
    --------------------------------------- 10.2/470.2 kB ? eta -:--:--
    --------------------------------------- 10.2/470.2 kB ? eta -:--:--
    --------------------------------------- 10.2/470.2 kB ? eta -:--:--
    --------------------------------------- 10.2/470.2 kB ? eta -:--:--
   -- ------------------------------------ 30.7/470.2 kB 108.9 kB/s eta 0:00:05
   -- ------------------------------------ 30.7/470.2 kB 108.9 kB/s eta 0:00:05
   -- ------------------------------------ 30.7/470.2 kB 108.9 kB/s eta 0:00:05
   -- ------------------------------------ 30.7/470.2 kB 108.9 kB/s eta 0:00:05
   --- ------------------------------------ 41.0/470.2 kB 81.9 kB/s eta 0:00:06
   ----- --------------------------------- 61.4/470.2 kB 126.1 kB/

In [7]:
import pandas as pd
import pickle
from sentence_transformers import SentenceTransformer

# Step 1: Load the dataset
df = pd.read_csv("containerization.csv")

# Step 2: Combine relevant columns into a single context string per row
def build_context(row):
    return (
        f"Paper ID: {row['paperid']}\n"
        f"Title: {row['title']}\n"
        f"Tools: {row['tools']}\n"
        f"DevOps Phase: {row['devopsphase']}\n"
        f"Challenge: {row['challenge']}\n"
        f"Theme: {row['theme']}\n"
        f"Contribution Type: {row['contributiontype']}"
    )

df["context"] = df.apply(build_context, axis=1)

# Step 3: Encode the context with a sentence transformer
model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = model.encode(df["context"].tolist(), show_progress_bar=True)

# Step 4: Save the vector store
with open("vector_store.pkl", "wb") as f:
    pickle.dump((df, embeddings), f)

print("✅ Vector store built and saved to vector_store.pkl")



Batches:   0%|          | 0/3 [00:00<?, ?it/s]

✅ Vector store built and saved to vector_store.pkl


In [37]:
import openai
import os
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import pickle

# Load vector store
with open("vector_store.pkl", "rb") as f:
    df, embeddings = pickle.load(f)

# Load embedding model (same as used in vector store creation)
model = SentenceTransformer("all-MiniLM-L6-v2")

# Set OpenAI API key securely
openai.api_key = "Your_OpenAI_Key"  # Replace with your actual key or use os.getenv("OPENAI_API_KEY")

# Define the RAG-based recommendation function
def recommend_with_rag(query, top_k=5):
    # Step 1: Embed user query
    query_embedding = model.encode([query])

    # Step 2: Compute cosine similarities
    similarities = cosine_similarity(query_embedding, embeddings)[0]
    top_indices = similarities.argsort()[::-1][:top_k]

    # Step 3: Embed paper IDs into the context to make them visible to the model
    retrieved = "\n\n---\n\n".join(
        f"Paper ID: {df.iloc[i]['paperid']}\n{df.iloc[i]['context']}" for i in top_indices
    )

    # Step 4: Prompt
    prompt = f"""You are a helpful DevOps assistant.
The user is interested in: "{query}"

Based on the following relevant papers and contexts, recommend:
- Tools (e.g., Docker, Helm)
- DevOps practices
- Platform and configuration advice

Important:
- In your answer, cite the source of each recommendation.
- When citing a single paper, use this format: (Reference 147)
- When citing multiple, use: (References 147, 589, 785)

Relevant contexts:
{retrieved}

Respond with a clear recommendation. Use the reference format shown above.
"""

    # Step 5: OpenAI ChatCompletion call
    response = openai.chat.completions.create(
        model="gpt-4",
        messages=[
            {"role": "system", "content": "You are a helpful DevOps assistant."},
            {"role": "user", "content": prompt}
        ]
    )

    return response.choices[0].message.content


In [35]:
# Ask user for input query
user_query = input("💬 Enter your DevOps or Kubernetes-related question: ")

# Generate recommendation using RAG
recommendation = recommend_with_rag(user_query)

# Print the result
print("📢 Recommendation:\n")
print(recommendation)



💬 Enter your DevOps or Kubernetes-related question:  orchestration


📢 Recommendation:

Based on the given research papers, here are the recommendations:

Tools:
1. Kubernetes: This is by far the most heavily suggested tool for orchestration in the literature (References 912, 1007, 988). Kubernetes allows for automated deployment, scaling, and management of containerized applications.
2. Docker: As a platform for operating-system-level virtualization, Docker packages applications and their dependencies in containers, enabling applications to run smoothly in various computing environments (References 912,1007,948).
3. Helm: A package manager for Kubernetes, Helm simplifies deployment of applications on Kubernetes and represents a considerable boon for DevOps processes (Reference 912).
4. ArgoCD: Mentioned in two papers, ArgoCD provides declarative and version-controlled application deployment to Kubernetes (References 912, 948)

DevOps Practices:
1. DevOps phases such as Deployment, Orchestration, and Configuration Management were highlighted (References