## Step 1Ô∏è‚É£: Install Dependencies

In [None]:
# Install required packages
!pip install -q transformers torch librosa soundfile accelerate huggingface-hub vllm

print("‚úÖ Dependencies installed!")

## Step 2Ô∏è‚É£: Clone Repository

In [None]:
import os
from pathlib import Path

# Clone the project
!git clone https://github.com/hasipfaruk/Stock-Price-Extractor.git

# Navigate to project
os.chdir("Stock-Price-Extractor")
print("‚úÖ Project cloned!")
print(f"üìÅ Current directory: {os.getcwd()}")

## Step 3Ô∏è‚É£: HuggingFace Authentication

In [None]:
from huggingface_hub import login

# Set your token (replace with yours)
HF_TOKEN = "hf_YOUR_TOKEN_HERE"

if HF_TOKEN == "hf_YOUR_TOKEN_HERE":
    print("‚ö†Ô∏è IMPORTANT: Replace with your actual HuggingFace token!")
    print("üìñ Get token from: https://huggingface.co/settings/tokens")
    print("1. Go to the link above")
    print("2. Create new token (read access is fine)")
    print("3. Copy and paste in the line above")
else:
    login(token=HF_TOKEN)
    print("‚úÖ Authenticated with HuggingFace!")

## Step 4Ô∏è‚É£: Verify GPU

In [None]:
import torch

print("=" * 70)
print("GPU STATUS")
print("=" * 70)

gpu_available = torch.cuda.is_available()
print(f"‚úÖ GPU Available: {gpu_available}")

if gpu_available:
    print(f"üìä GPU Device: {torch.cuda.get_device_name(0)}")
    print(f"üíæ GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.1f} GB")
    print("\n‚ú® GPU enabled - processing will be FAST!")
else:
    print("\n‚ö†Ô∏è GPU not available - will use CPU (slower)")
    print("üí° To enable GPU: Runtime ‚Üí Change runtime type ‚Üí GPU")

## Step 5Ô∏è‚É£: Upload Audio Files

In [None]:
from google.colab import files
import os

# Create upload directory
os.makedirs('uploaded_audio', exist_ok=True)

print("üìÅ Upload audio files:")
print("1. Click 'Choose Files'")
print("2. Select multiple audio files (WAV, MP3, FLAC, M4A)")
print("3. Wait for upload to complete\n")

uploaded = files.upload()

print(f"\n‚úÖ {len(uploaded)} files uploaded:")
for filename in uploaded.keys():
    file_path = f'uploaded_audio/{filename}'
    os.rename(filename, file_path)
    file_size = os.path.getsize(file_path) / (1024 * 1024)
    print(f"  üìÑ {filename} ({file_size:.1f} MB)")

## Step 6Ô∏è‚É£: Upload Extraction Prompt

In [None]:
from google.colab import files

# Upload your prompt file
print("üìù Upload extraction prompt file:")
print("Click 'Choose Files' and select your prompt.txt file\n")

prompt_files = files.upload()

if prompt_files:
    prompt_filename = list(prompt_files.keys())[0]
    os.rename(prompt_filename, 'colab_prompt.txt')
    print(f"‚úÖ Prompt uploaded: {prompt_filename}")
    
    # Show first 200 chars
    with open('colab_prompt.txt', 'r') as f:
        content = f.read()
    print(f"\nüìñ Prompt preview ({len(content)} chars):")
    print(content[:200] + "...\n" if len(content) > 200 else content)
else:
    print("‚ö†Ô∏è No prompt file uploaded. Using default prompt.")
    # Create default prompt
    default_prompt = """Extract stock price information from the transcript.

Return JSON with these fields:
- index_name: Stock index name (e.g., \"S&P 500\")
- price: Current price
- change: Change in points
- change_percent: Percent change

Return ONLY valid JSON, no explanation."""
    
    with open('colab_prompt.txt', 'w') as f:
        f.write(default_prompt)
    print("‚úÖ Default prompt created")

## Step 7Ô∏è‚É£: Import Functions

In [None]:
import sys
from pathlib import Path

# Add project to path
project_path = Path.cwd()
sys.path.insert(0, str(project_path))

# Import functions
from app.models.transcribe import transcribe
from app.models.llm_extract import extract_with_long_prompt

print("‚úÖ Functions imported successfully!")
print(f"  üìç transcribe() - ready")
print(f"  üìç extract_with_long_prompt() - ready")

## Step 8Ô∏è‚É£: Batch Processing

In [None]:
import json
from pathlib import Path
import torch
import gc

# Find all audio files
audio_files = sorted(list(Path('uploaded_audio').glob('*')))
audio_files = [f for f in audio_files if f.suffix.lower() in ['.wav', '.mp3', '.flac', '.m4a', '.ogg']]

print(f"üìÅ Found {len(audio_files)} audio files\n")

if len(audio_files) == 0:
    print("‚ùå No audio files to process")
else:
    all_results = {}
    
    for i, audio_file in enumerate(audio_files, 1):
        filename = audio_file.name
        print(f"[{i}/{len(audio_files)}] Processing {filename}...", end=' ', flush=True)
        
        try:
            # Clear memory
            torch.cuda.empty_cache()
            gc.collect()
            
            # Transcribe
            result = transcribe(str(audio_file))
            transcript = result.get('result') if isinstance(result, dict) else result
            
            # Extract
            extraction = extract_with_long_prompt(transcript, prompt_file='colab_prompt.txt')
            
            all_results[filename] = {
                "status": "success",
                "data": extraction
            }
            print("‚úÖ")
            
        except Exception as e:
            all_results[filename] = {
                "status": "error",
                "error": str(e)[:100]
            }
            print(f"‚ùå ({str(e)[:30]})")
    
    # Save results
    with open('batch_results.json', 'w') as f:
        json.dump(all_results, f, indent=2)
    
    print(f"\n‚úÖ All results saved to: batch_results.json")
    
    # Summary
    success = sum(1 for r in all_results.values() if r["status"] == "success")
    failed = len(all_results) - success
    
    print(f"\nüìä Summary:")
    print(f"  ‚úÖ Successful: {success}/{len(all_results)}")
    print(f"  ‚ùå Failed: {failed}/{len(all_results)}")
    
    # Show sample results
    if success > 0:
        print(f"\nüìà Sample Results:")
        for filename, result in list(all_results.items())[:3]:
            if result["status"] == "success":
                print(f"\n  {filename}:")
                data = result["data"]
                print(f"    Index: {data.get('index_name')}")
                print(f"    Price: {data.get('price')}")
                print(f"    Change: {data.get('change')} ({data.get('change_percent')})")

## Step 9Ô∏è‚É£: Download Results

In [None]:
from google.colab import files
from pathlib import Path

print("üì• Download your results:\n")

# Download batch results if exists
if Path('batch_results.json').exists():
    print("1. Downloading batch_results.json...")
    files.download('batch_results.json')
    print("   ‚úÖ Downloaded!")

print("\n‚úÖ All files ready for download!")
print("\nCheck the 'Files' tab (left panel) to download any files.")

## üéâ Complete!

Your stock price extraction is done! üöÄ

**Next steps:**
1. Download `batch_results.json` from the Files panel
2. Import results into your spreadsheet or database
3. Use for further analysis

**For more info:**
- See `README.md` for general documentation
- See `GOOGLE_COLAB.md` for detailed Colab guide
- See `USAGE.md` for usage examples