In [1]:
from langchain_community.document_loaders import GitLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser,JsonOutputParser
from langchain_core.documents import Document
from dotenv import load_dotenv

  from .autonotebook import tqdm as notebook_tqdm


In [2]:

load_dotenv()
output_parser = StrOutputParser()

In [3]:
# 1Ô∏è‚É£ Load repo
loader = GitLoader(
    clone_url="https://github.com/chaitanyaSn/Fintech-Bank-Wallet.git",
    repo_path="./temp_clone",
    branch="main"
)


In [4]:
docs = loader.load()
print(f"\nüìö Total documents loaded: {len(docs)}")


üìö Total documents loaded: 119


In [5]:
# 2Ô∏è‚É£ Split into code-aware chunks
splitter = RecursiveCharacterTextSplitter.from_language(
    language="java",
    chunk_size=2000,
    chunk_overlap=400
)
chunks = splitter.split_documents(docs)
print(f"\nüìÑ Total chunks created: {len(chunks)}")
print(chunks[:4])  # Print first 2 chunks for inspection


üìÑ Total chunks created: 212
[Document(metadata={'source': 'README.md', 'file_path': 'README.md', 'file_name': 'README.md', 'file_type': '.md'}, page_content='# üí∞ Digital Wallet Microservices Platform\n\n![Architecture](https://github.com/user-attachments/assets/05d876f2-0d59-47fc-9cfa-bc12b059367b)\n\nA scalable microservices-based digital wallet platform with secure transactions and real-time notifications.\n\n\n## üèóÔ∏è Architecture\n\n**üåê Client Layer:** Postman/Web Apps ‚Üí REST API requests  \n**üö™ Gateway:** API Gateway (JWT auth, rate limiting, load balancing)  \n**üéØ Services:** User, Wallet, Transaction, Notification microservices  \n**üîç Infrastructure:** Eureka (discovery), Redis (cache), Kafka (messaging), Docker  \n\n## üîÑ Flow\n```\nClient ‚Üí API Gateway ‚Üí Auth Filter ‚Üí Service Discovery ‚Üí Target Service\nUser Sign Up ‚Üí Wallet Creation ‚Üí Transaction Processing ‚Üí Kafka Event ‚Üí Notification\n```\n\n\n## üéØ Services\n\n| Service | Port | 

In [6]:
# 3Ô∏è‚É£ Create embeddings with Hugging Face model
print("üîÑ Initializing embeddings...")
embeddings = HuggingFaceEmbeddings(model="nomic-ai/nomic-embed-text-v1", model_kwargs={"trust_remote_code": True})

üîÑ Initializing embeddings...


<All keys matched successfully>


In [7]:
# 4Ô∏è‚É£ Create and persist FAISS index
print("üíæ Creating vector store...")
vectorstore = FAISS.from_documents(chunks, embeddings)
vectorstore.save_local("faiss_index")
print("‚úÖ Vector store created and saved locally.")

üíæ Creating vector store...
‚úÖ Vector store created and saved locally.


In [8]:
# 5Ô∏è‚É£ Build retriever + LLM
retriever = vectorstore.as_retriever(search_type="similarity", search_kwargs={"k": 7})
llm = ChatGoogleGenerativeAI(model="gemini-2.5-flash")

In [9]:
# 6Ô∏è‚É£ Prompt Template
final_ans_prompt = PromptTemplate(
    template="""You are a helpful assistant analyzing a GitHub repository.
Answer the question based only on the context provided below.
If you don't know, just say you don't know.

Context:
{context}

Question:
{question}

Answer:""",
    input_variables=["context", "question"]
)

In [22]:
# 7Ô∏è‚É£ Ask Question
question = "how user auth works int his project?"
print(f"\n‚ùì Question: {question}")


‚ùì Question: how user auth works int his project?


In [23]:
question_promt=PromptTemplate(
   template= """
You are an AI assistant responsible for rewriting user queries 
so they become clearer, more detailed, and optimized for code retrieval.

Rewrite the user's question to include:
- relevant technical keywords
- related file/module names (if mentioned)
- missing context needed to answer properly

Do NOT answer the question. Only rewrite it.

Original question:
{question}

Rewritten improved question:
""",
    input_variables=["question"]
)

In [24]:
question_context = question_promt.format(question=question)
print(f"\nüßπ Cleaned Question: {question_context}")


üßπ Cleaned Question: 
You are an AI assistant responsible for rewriting user queries 
so they become clearer, more detailed, and optimized for code retrieval.

Rewrite the user's question to include:
- relevant technical keywords
- related file/module names (if mentioned)
- missing context needed to answer properly

Do NOT answer the question. Only rewrite it.

Original question:
how user auth works int his project?

Rewritten improved question:



In [25]:
ques_res=llm.invoke(question_context)
parsed_ques_res=output_parser.invoke(ques_res)
print(f"\nüß† Question Rewriting Response: {parsed_ques_res}")



üß† Question Rewriting Response: Please provide a detailed explanation of the user authentication and authorization system implemented in this project. Specifically, clarify the following aspects:

*   **Authentication Mechanism:** Does the project utilize session-based authentication (e.g., cookies, server-side sessions) or token-based authentication (e.g., JWT, OAuth2 access tokens, refresh tokens)?
*   **User Registration:** Describe the workflow for new user signup, including how user credentials are handled, password hashing algorithms (e.g., bcrypt, Argon2), and storage within the database (e.g., `users` table, `User` model schema).
*   **User Login:** Detail the process for user sign-in, including credential verification, and how authentication tokens or session IDs are generated and provided to the client.
*   **Session/Token Management:** Explain how user sessions are maintained, validated, and expired. If using tokens, how are they stored (e.g., HTTP-only cookies, local sto

In [26]:
retrieved_docs = retriever.invoke(parsed_ques_res)
context_text = " ".join([doc.page_content for doc in retrieved_docs])
print(f"\nüìÑ Retrieved {len(retrieved_docs)} relevant documents.")



üìÑ Retrieved 7 relevant documents.


In [27]:
final_prompt = final_ans_prompt.format(context=context_text, question=question)
response = llm.invoke(final_prompt)
final_res = output_parser.invoke(response)  
print("\nü§ñ Response:\n", final_res)


ü§ñ Response:
 User authentication in this project works as follows:

1.  **User Registration**: Users can register by sending a `POST` request to `/users/register` with their `UserDto`. The `userService` creates the user, and passwords are likely encrypted using `BCryptPasswordEncoder` as configured in `SecurityConfig`. A `UserRegisteredEvent` is then published via Kafka using `UserProducer`.

2.  **User Login**:
    *   A user sends a `POST` request to `/users/login` with their `LoginDto` (containing email and password).
    *   The `AuthenticationManager` attempts to authenticate these credentials. If authentication fails, an "Invalid email or password" error is returned.
    *   If authentication is successful, `UserDetailsService` loads the user's details, which are represented by a `CustomsUserDetail` object.
    *   The `JwtUtil` then generates a JSON Web Token (JWT). This token includes user claims such as `id`, `email`, `name`, and `walletId`, and is signed using HS512. The 