In [4]:
import nest_asyncio

nest_asyncio.apply()

## Config


In [5]:
import yaml

with open("../config.yaml") as config_file:
    config = yaml.safe_load(config_file)

## Read github repository


In [6]:
from llama_index.readers.github import GithubClient, GithubRepositoryReader

github_client = GithubClient(github_token=config["github"]["token"])
reader = GithubRepositoryReader(
    github_client=github_client,
    owner=config["github"]["owner"],
    repo=config["github"]["repo"],
    filter_file_extensions=(
        ".py",
        GithubRepositoryReader.FilterType.INCLUDE,
    ),
)
documents = reader.load_data(branch=config["github"]["branch"])
docs = list(
    filter(
        lambda d: d.metadata["file_name"].endswith(".py"),
        documents,
    )
)

In [7]:
docs

[Document(id_='e69de29bb2d1d6434b8b29ae775ad8c2e48c5391', embedding=None, metadata={'file_path': 'api/__init__.py', 'file_name': '__init__.py', 'url': 'https://api.github.com/PodYapolskiy/follow-my-reading/blob/main/api/__init__.py'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}'),
 Document(id_='23780433224252d4be57cc2e12853358c6ab569c', embedding=None, metadata={'file_path': 'api/v1/__init__.py', 'file_name': '__init__.py', 'url': 'https://api.github.com/PodYapolskiy/follow-my-reading/blob/main/api/v1/__init__.py'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource

## Summarize each file


In [8]:
sample = docs[2].dict()
sample

{'id_': '582369e0d768ff3a074c3eaf431872a594e88fe9',
 'embedding': None,
 'metadata': {'file_path': 'api/v1/audio.py',
  'file_name': 'audio.py',
  'url': 'https://api.github.com/PodYapolskiy/follow-my-reading/blob/main/api/v1/audio.py'},
 'excluded_embed_metadata_keys': [],
 'excluded_llm_metadata_keys': [],
 'relationships': {},
 'metadata_template': '{key}: {value}',
 'metadata_separator': '\n',
 'text_resource': {'embeddings': None,
  'text': 'from io import BytesIO\nfrom uuid import UUID, uuid4\n\nimport pydub\nfrom fastapi import APIRouter, Depends, HTTPException, UploadFile, status\nfrom fastapi.responses import FileResponse\nfrom huey.api import Result\nfrom loguru import logger\nfrom pydantic.error_wrappers import ValidationError\n\nfrom config import get_config\nfrom core import task_system\nfrom core.plugins.no_mem import get_audio_plugins\n\nfrom .auth import get_current_active_user\nfrom .models import (\n    AudioExtractPhrasesRequest,\n    AudioExtractPhrasesResponse,\n  

In [9]:
from rich.console import Console
from rich.syntax import Syntax

console = Console()

code_snippet = Syntax(
    sample["text"],
    "python",
    theme="dracula",  # "monokai",
    line_numbers=True,
    # line_range=(1, 25),
)
console.print(code_snippet)

In [10]:
code_explanation_template = """
Please explain the following Python code in simple terms:

```python
{code}
```

Provide a detailed explanation covering:
1. What the code does
2. The purpose of each function or class
3. Any important programming concepts used
4. Potential improvements or best practices
"""

In [11]:
import torch

torch.cuda.is_available()

True

In [12]:
from langchain_ollama import OllamaLLM
from langchain.prompts import PromptTemplate

llm = OllamaLLM(
    model="llama3.2:1b",
    # base_url="http://localhost:11434",  # Change if using a remote instance
    temperature=0.2,  # Lower for more deterministic responses
)

In [13]:
prompt = PromptTemplate(input_variables=["code"], template=code_explanation_template)
chain = prompt | llm

explanation = chain.invoke({"code": sample["text"]})
print(explanation)

**Overview**

The provided code is a comprehensive API for audio processing tasks, including extracting phrases from audio files using various models and downloading them as MP3s. It consists of multiple functions that handle different aspects of the task, such as creating jobs, retrieving results, and validating inputs.

**Functionality Breakdown**

1. **Audio Processing Tasks**
	* The code defines several functions for creating tasks:
		+ `extract_text_from_audio`: Extracts phrases from an audio file using a specified model.
		+ `extract_phrases_from_audio`: Creates a task to extract phrases from an audio file using a specified model and phrase list.
2. **Job Management**
	* The code uses the `task_system` module to manage jobs, which are instances of tasks that can be executed concurrently.
3. **Result Retrieval**
	* Functions like `_get_job_status`, `_get_job_result`, and `TaskCreateResponse` handle retrieving results from completed jobs.
4. **Error Handling**
	* The code includes 

In [None]:
# print(explanation["code"])

from io import BytesIO
from uuid import UUID, uuid4

import pydub
from fastapi import APIRouter, Depends, HTTPException, UploadFile, status
from fastapi.responses import FileResponse
from huey.api import Result
from loguru import logger
from pydantic.error_wrappers import ValidationError

from config import get_config
from core import task_system
from core.plugins.no_mem import get_audio_plugins

from .auth import get_current_active_user
from .models import (
    AudioExtractPhrasesRequest,
    AudioExtractPhrasesResponse,
    AudioProcessingRequest,
    AudioProcessingResponse,
    ModelData,
    ModelsDataResponse,
    TaskCreateResponse,
    UploadFileResponse,
)
from .task_utils import _get_job_result, _get_job_status, create_audio_task

logger.add(
    "./logs/audio.log",
    format="{time:DD-MM-YYYY HH:mm:ss zz} {level} {message}",
    enqueue=True,
)
config = get_config()
router = APIRouter(
    prefix="/audio", tags=["audio"], dependencies=[Depends(get_current_active_user)]
)



In [None]:
from llama_index.core import Settings, PropertyGraphIndex
from llama_index.core.chat_engine.types import ChatMode
from llama_index.llms.ollama import Ollama

# from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

device = "cuda"
LLM = "llama3.2:1b"
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"  # "jinai"


Settings.llm = Ollama(
    LLM,
    request_timeout=3 * 60,
    # async_mode=True,
)
Settings.embed_model = HuggingFaceEmbedding(EMBED_MODEL, device=device)