In [1]:
import sys
import subprocess

# Uninstall the specific packages causing the conflict
pkgs_to_remove = ["langchain-classic", "langgraph", "langgraph-prebuilt", "langgraph-checkpoint", "langgraph-sdk"]
print(f"Removing conflicting packages: {pkgs_to_remove}...")
subprocess.run([sys.executable, "-m", "pip", "uninstall", "-y"] + pkgs_to_remove)

# Just to be safe, ensure the good ones are still there
print("Verifying core packages...")
subprocess.run([sys.executable, "-m", "pip", "install", "langchain==0.3.0", "langchain-community==0.3.0", "langchain-openai", "langchain-chroma", "sentence-transformers"])

Removing conflicting packages: ['langchain-classic', 'langgraph', 'langgraph-prebuilt', 'langgraph-checkpoint', 'langgraph-sdk']...


[0m

Verifying core packages...


CompletedProcess(args=['/home/vamsi/Documents/ml-1/.venv/bin/python', '-m', 'pip', 'install', 'langchain==0.3.0', 'langchain-community==0.3.0', 'langchain-openai', 'langchain-chroma', 'sentence-transformers'], returncode=0)

In [2]:
from git import Repo
from git.exc import GitCommandError
import os
import nbformat
import tiktoken 
from langchain_text_splitters import RecursiveCharacterTextSplitter, Language 
from langchain_core.documents import Document
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document
from langchain_huggingface import HuggingFaceEmbeddings # CHANGED
from langchain_chroma import Chroma
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA


download Repo_files

In [3]:
repo_url = 'https://github.com/vavinash992/crypto-currency-price-predictor'
destination_folder = "repo_files"

repo = Repo.clone_from(
    repo_url,
    destination_folder,
    no_checkout=True
)

repo.git.config("core.sparseCheckout", "true")

sparse_file = os.path.join(
    destination_folder, ".git", "info", "sparse-checkout"
)

with open(sparse_file, "w") as f:
    f.write("*.py\n")
    f.write("*.js\n")
    f.write("*.md\n")
    f.write("*.ipynb\n")
repo.git.checkout()

"Your branch is up to date with 'origin/main'."

to delete empty files

In [4]:
for root, dirs, files in os.walk(destination_folder, topdown=False):
    if not os.listdir(root):
        os.rmdir(root)

In [5]:
ALLOWED_EXTENSIONS = {".py", ".js", ".md", ".ipynb"}
TOKENIZER_NAME = "cl100k_base"
MAX_TOKENS_PER_CHUNK = 800 
CHUNK_OVERLAP = 100
enc = tiktoken.get_encoding(TOKENIZER_NAME)

In [6]:
def count_tokens(text):
    return len(enc.encode(text))

In [None]:
def get_splitter_for_file(file_path):
    ext = os.path.splitext(file_path)[1].lower()
    
    if ext == ".py":
        return RecursiveCharacterTextSplitter.from_language(
            language=Language.PYTHON, 
            chunk_size=MAX_TOKENS_PER_CHUNK, 
            chunk_overlap=CHUNK_OVERLAP,
            length_function=count_tokens # Using tiktoken to measure length
        )
    elif ext == ".js":
        return RecursiveCharacterTextSplitter.from_language(
            language=Language.JS, 
            chunk_size=MAX_TOKENS_PER_CHUNK, 
            chunk_overlap=CHUNK_OVERLAP,
            length_function=count_tokens
        )
    elif ext == ".md":
        return RecursiveCharacterTextSplitter.from_language(
            language=Language.MARKDOWN, 
            chunk_size=MAX_TOKENS_PER_CHUNK, 
            chunk_overlap=CHUNK_OVERLAP,
            length_function=count_tokens
        )
    else:
        # Fallback for other text files (like requirements.txt or raw .ipynb text)
        return RecursiveCharacterTextSplitter(
            chunk_size=MAX_TOKENS_PER_CHUNK, 
            chunk_overlap=CHUNK_OVERLAP,
            length_function=count_tokens
        )

Reading files

In [8]:
def read_text_file(path):
    with open(path, "r", encoding="utf-8", errors="ignore") as f:
        return f.read()

def read_notebook(path):
    nb = nbformat.read(path, as_version=4)
    parts = []

    for cell in nb.cells:
        if cell.cell_type in ("markdown", "code"):
            parts.append(cell.source)

    return "\n\n".join(parts)

def read_file(path):
    ext = os.path.splitext(path)[1].lower()

    if ext == ".ipynb":
        return read_notebook(path)
    elif ext in {".py", ".js", ".md"}:
        return read_text_file(path)
    else:
        return ""

 Repo Processor

In [9]:
def process_folder(folder_path):
    documents = []

    for root, _, files in os.walk(folder_path):
        for file in files:
            # Filter by extension
            file_ext = os.path.splitext(file)[1].lower()
            if file_ext not in ALLOWED_EXTENSIONS:
                continue

            # Paths
            full_path = os.path.join(root, file)
            rel_path = os.path.relpath(full_path, folder_path) # e.g. "app.py" instead of "repo_files/app.py"

            # Read content
            content = read_file(full_path)
            if not content or not content.strip():
                continue

            # 1. Get the correct splitter
            splitter = get_splitter_for_file(full_path)
            
            # 2. Split the text
            text_chunks = splitter.split_text(content)

            # 3. Create Rich Documents
            for i, chunk in enumerate(text_chunks):
                
                # --- A. Prepare Header for LLM (In-Context) ---
                if file_ext == ".md":
                    header = f"\n"
                else:
                    header = f"# File: {rel_path}\n"
                
                chunk_with_header = header + chunk

                # --- B. Store Structured Metadata (For DB/Filtering) ---
                documents.append({
                    "text": chunk_with_header, 
                    "metadata": {
                        "source": rel_path,         # Standard key for most RAG apps
                        "full_path": full_path,
                        "extension": file_ext,
                        "chunk_index": i,
                        "token_count": count_tokens(chunk_with_header)
                    }
                })

    return documents

Debugging

In [10]:
!ls -R repo_files

repo_files:
 app.py  'crypto price predictor.ipynb'   README.md


In [11]:
repo_path = "repo_files"
docs = process_folder(repo_path)

print(f"Total chunks: {len(docs)}\n")

if len(docs) > 0:
    first_doc = docs[0]
    
    print("--- PREVIEW OF CHUNK 0 ---")
    print(f"File Source: {first_doc['metadata']['source']}")
    print(f"Token Count: {first_doc['metadata']['token_count']}")
    print("--------------------------")
    # Print the first 200 characters of the text to verify the header
    print(first_doc['text'][:200]) 
    print("...")

Total chunks: 4

--- PREVIEW OF CHUNK 0 ---
File Source: crypto price predictor.ipynb
Token Count: 771
--------------------------
# File: crypto price predictor.ipynb
from keras.models import Sequential
from keras.layers import Activation, Dense, Dropout, LSTM
import matplotlib.pyplot as plt
import numpy as np
import pandas as p
...


In [12]:
# 1. Run Imports (Required after restart)
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document

# 2. Define the Function
def create_vector_db(docs):
    print("Initializing Sentence Transformers (this may take a moment)...")
    
    # Setup Embeddings (Runs locally, free)
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    
    # Convert to Documents
    langchain_docs = [
        Document(page_content=d["text"], metadata=d["metadata"])
        for d in docs
    ]

    # Create/Update ChromaDB
    vector_db = Chroma.from_documents(
        documents=langchain_docs,
        embedding=embeddings,
        persist_directory="./chroma_db_sentence"
    )
    
    print(f"Successfully saved {len(langchain_docs)} chunks to ChromaDB.")
    return vector_db

# 3. Execution Check
# We check if 'docs' exists. If you just restarted, this might fail unless you ran the previous cells.
if 'docs' in globals() and docs:
    vector_db = create_vector_db(docs)
else:
    print("‚ö†Ô∏è 'docs' variable not found!")
    print("Please re-run your 'Repo Processor' cells (process_folder) to generate the 'docs' list first.")

Initializing Sentence Transformers (this may take a moment)...


  from .autonotebook import tqdm as notebook_tqdm


Successfully saved 4 chunks to ChromaDB.


In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
my_api_key =os.getenv('my_api_key1')


In [None]:
from langchain_openai import ChatOpenAI
from langchain.chains import RetrievalQA

# ==========================================
# 2. CONNECT TO AI
# ==========================================
# We use Gemini 2.0 Flash (Free) because it is fast and reliable.
llm = ChatOpenAI(
    model="xiaomi/mimo-v2-flash:free",
    openai_api_key=my_api_key,
    base_url="https://openrouter.ai/api/v1",
    temperature=0
)

# ==========================================
# 3. CHAT WITH YOUR CODE
# ==========================================
if 'vector_db' in globals():
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_db.as_retriever(search_kwargs={"k": 3}),
        return_source_documents=True
    )

    print("‚úÖ Connected! Asking question...\n")
    
    try:
        query = "What is the purpose of app.py?"
        response = qa_chain.invoke({"query": query})
        
        print(f"ü§ñ Answer:\n{response['result']}\n")
        print("üìÑ Sources Used:")
        for doc in response['source_documents']:
            print(f"- {doc.metadata.get('source', 'Unknown')}")
            
    except Exception as e:
        print(f"‚ùå Error: {e}")
else:
    print("‚ö†Ô∏è Error: 'vector_db' is missing. You need to run the 'create_vector_db' cell first.")

‚úÖ Connected! Asking question...

ü§ñ Answer:
Based on the provided code, the purpose of `app.py` is to create a web application using the Flask framework that predicts cryptocurrency prices.

Here is a breakdown of its specific functions:

*   **Web Server:** It sets up a Flask web server to handle user requests.
*   **User Interface:** It serves an HTML page (`index.html`) where a user can select a cryptocurrency and a date.
*   **Data Retrieval:** When a user submits their choice, the application fetches historical price data for that cryptocurrency from Yahoo Finance.
*   **Machine Learning Model:** It builds, trains, and runs a Long Short-Term Memory (LSTM) neural network using Keras/TensorFlow on the retrieved historical data.
*   **Price Prediction:** The model is used to predict the closing price of the selected cryptocurrency for the specific date the user chose.
*   **Display Results:** It shows the predicted price to the user on a new web page (`model.html`).

üìÑ Sources

In [19]:
# --- CONTINUOUS CHAT LOOP ---
print("üí¨ Chat session started! Type 'exit' or 'quit' to stop.\n")

while True:
    # 1. Get User Input
    user_input = input("\nYou: How is the LSTM model configured?")
    
    # 2. Check for Exit
    if user_input.lower() in ["exit", "quit"]:
        print("Goodbye! üëã")
        break
    
    # 3. Ask the AI
    try:
        if 'qa_chain' in globals():
            response = qa_chain.invoke({"query": user_input})
            print(f"ü§ñ AI: {response['result']}")
        else:
            print("‚ö†Ô∏è Error: 'qa_chain' is not defined. Please run the connection cell above first.")
            break
            
    except Exception as e:
        print(f"‚ùå Error: {e}")

üí¨ Chat session started! Type 'exit' or 'quit' to stop.

ü§ñ AI: Based on the provided code, the LSTM model is configured as a Sequential model with the following layers:

1.  **LSTM Layer:**
    *   **Units:** 100
    *   **Input Shape:** Dynamically set based on the training data shape `(X_train.shape[1], X_train.shape[2])`.
2.  **Dropout Layer:**
    *   **Rate:** 0.2 (This drops 20% of the units during training to prevent overfitting).
3.  **Dense Layer:**
    *   **Units:** 1 (Output layer).
4.  **Activation Layer:**
    *   **Function:** Linear.

**Compilation Settings:**
*   **Loss Function:** Mean Squared Error (`mse`).
*   **Optimizer:** Adam.

**Training Settings:**
*   **Epochs:** 20
*   **Batch Size:** 32
*   **Shuffle:** True
ü§ñ AI: Based on the provided context, I cannot determine when the project was committed. The text does not contain any dates or version control information (like commit logs) that would indicate when the code was written or submitted.
ü§ñ AI: Ba

In [15]:
"How is the LSTM model configured?"


'How is the LSTM model configured?'