<a href="https://colab.research.google.com/github/Vanaja146/Backend/blob/main/MiniProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>






## **Title: Audio To Text**

In [None]:
# Voice-to-Text App with Ngrok Fix for Google Colab
# Install required packages
!pip install SpeechRecognition flask pydub pyngrok

import os
import speech_recognition as sr
import flask
from flask import Flask, request, render_template_string, jsonify
from pyngrok import ngrok
import base64
import io
import tempfile
from pydub import AudioSegment
import IPython.display as ipd
from IPython.display import HTML, display
import threading
import time
import queue

# Create a Flask app for the web interface
app = Flask(__name__)

# Initialize the recognizer
recognizer = sr.Recognizer()

# Queue for processing audio recordings
audio_queue = queue.Queue()
results = {}

# Function to process audio in the background
def process_audio_files():
    while True:
        try:
            file_id, audio_path = audio_queue.get(timeout=1)
            try:
                with sr.AudioFile(audio_path) as source:
                    audio_data = recognizer.record(source)
                    # Using Google's Speech Recognition
                    text = recognizer.recognize_google(audio_data)
                    results[file_id] = {"status": "complete", "text": text}
            except Exception as e:
                results[file_id] = {"status": "error", "error": str(e)}
            finally:
                # Clean up temp file
                if os.path.exists(audio_path):
                    os.remove(audio_path)
                audio_queue.task_done()
        except queue.Empty:
            time.sleep(0.1)

# Start the background thread
process_thread = threading.Thread(target=process_audio_files, daemon=True)
process_thread.start()

# HTML template for the web interface
HTML_TEMPLATE = '''
<!DOCTYPE html>
<html>
<head>
    <title>Voice to Text Converter</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 0;
            padding: 20px;
            background-color: #f5f5f5;
        }
        .container {
            max-width: 800px;
            margin: 0 auto;
            background-color: white;
            padding: 20px;
            border-radius: 8px;
            box-shadow: 0 2px 10px rgba(0,0,0,0.1);
        }
        h1 {
            color: #333;
            text-align: center;
        }
        .section {
            margin-bottom: 20px;
            padding: 15px;
            border: 1px solid #ddd;
            border-radius: 4px;
        }
        .section h2 {
            margin-top: 0;
            color: #444;
        }
        button {
            padding: 12px 20px;
            background-color: #4285f4;
            color: white;
            border: none;
            border-radius: 4px;
            cursor: pointer;
            margin: 5px 0;
            font-size: 16px;
            transition: all 0.3s ease;
        }
        button:hover {
            background-color: #3367d6;
            transform: translateY(-2px);
            box-shadow: 0 4px 8px rgba(0,0,0,0.1);
        }
        button:disabled {
            background-color: #cccccc;
            cursor: not-allowed;
            transform: none;
            box-shadow: none;
        }
        #result {
            margin-top: 20px;
            padding: 15px;
            border: 1px solid #ddd;
            border-radius: 4px;
            min-height: 100px;
            background-color: #fafafa;
        }
        .loader {
            border: 5px solid #f3f3f3;
            border-top: 5px solid #3498db;
            border-radius: 50%;
            width: 30px;
            height: 30px;
            animation: spin 2s linear infinite;
            margin: 10px auto;
            display: none;
        }
        @keyframes spin {
            0% { transform: rotate(0deg); }
            100% { transform: rotate(360deg); }
        }
        #audioControls {
            display: flex;
            justify-content: space-around;
            margin: 20px 0;
        }
        .record-btn {
            display: flex;
            align-items: center;
            justify-content: center;
            width: 180px;
            height: 60px;
            border-radius: 30px;
            font-weight: bold;
        }
        #recordBtn {
            background-color: #ea4335;
        }
        #recordBtn:hover {
            background-color: #d62516;
        }
        #stopBtn {
            background-color: #34a853;
        }
        #stopBtn:hover {
            background-color: #2d9249;
        }
        .status-indicator {
            text-align: center;
            font-weight: bold;
            margin: 10px 0;
            color: #555;
        }
        .recording-pulse {
            display: inline-block;
            width: 18px;
            height: 18px;
            border-radius: 50%;
            background: #ea4335;
            margin-right: 10px;
            animation: pulse 1.5s infinite;
        }
        @keyframes pulse {
            0% { transform: scale(0.95); box-shadow: 0 0 0 0 rgba(234, 67, 53, 0.7); }
            70% { transform: scale(1); box-shadow: 0 0 0 10px rgba(234, 67, 53, 0); }
            100% { transform: scale(0.95); box-shadow: 0 0 0 0 rgba(234, 67, 53, 0); }
        }
        .transcript-history {
            max-height: 300px;
            overflow-y: auto;
            border: 1px solid #eee;
            padding: 10px;
            border-radius: 4px;
            background-color: white;
            margin-top: 15px;
        }
        .transcript-entry {
            padding: 8px;
            border-bottom: 1px solid #eee;
        }
        .transcript-entry:last-child {
            border-bottom: none;
        }
        .transcript-time {
            font-size: 12px;
            color: #777;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>Real-time Voice to Text Converter</h1>

        <div class="section">
            <h2>Voice Detection</h2>
            <p>Click "Start Recording" and speak into your microphone to convert your voice to text in real-time.</p>

            <div id="audioControls">
                <button id="recordBtn" class="record-btn">
                    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="white">
                        <path d="M12 14c1.66 0 3-1.34 3-3V5c0-1.66-1.34-3-3-3S9 3.34 9 5v6c0 1.66 1.34 3 3 3z"/>
                        <path d="M17 11c0 2.76-2.24 5-5 5s-5-2.24-5-5H5c0 3.53 2.61 6.43 6 6.92V21h2v-3.08c3.39-.49 6-3.39 6-6.92h-2z"/>
                    </svg>
                    &nbsp;Start Recording
                </button>
                <button id="stopBtn" class="record-btn" disabled>
                    <svg xmlns="http://www.w3.org/2000/svg" width="24" height="24" viewBox="0 0 24 24" fill="white">
                        <path d="M6 6h12v12H6z"/>
                    </svg>
                    &nbsp;Stop Recording
                </button>
            </div>

            <div class="status-indicator" id="recordingStatus" style="display: none;">
                <span class="recording-pulse"></span> Recording in progress...
            </div>

            <div class="loader" id="recordLoader"></div>
        </div>

        <div id="result">
            <h2>Current Transcription</h2>
            <p id="transcription">No transcription available yet. Click "Start Recording" and speak into your microphone.</p>

            <h3>Transcript History</h3>
            <div class="transcript-history" id="transcriptHistory"></div>
        </div>
    </div>

    <script>
        // Recording functionality
        let mediaRecorder;
        let audioChunks = [];
        let transcriptHistory = [];
        let isRecording = false;
        let continuousRecording = false;
        let recordingInterval;

        document.getElementById('recordBtn').addEventListener('click', startRecording);
        document.getElementById('stopBtn').addEventListener('click', stopRecording);

        async function startRecording() {
            try {
                const stream = await navigator.mediaDevices.getUserMedia({ audio: true });
                mediaRecorder = new MediaRecorder(stream);

                audioChunks = [];
                mediaRecorder.ondataavailable = function(e) {
                    audioChunks.push(e.data);
                };

                mediaRecorder.onstop = processRecording;

                // Start recording
                mediaRecorder.start();
                isRecording = true;

                // Enable continuous recording mode
                continuousRecording = true;

                // Update UI
                document.getElementById('recordBtn').disabled = true;
                document.getElementById('stopBtn').disabled = false;
                document.getElementById('recordingStatus').style.display = 'block';
                document.getElementById('transcription').textContent = 'Listening...';

                // Set up continuous recording (stop and restart every 5 seconds to get real-time transcription)
                recordingInterval = setInterval(() => {
                    if (isRecording && continuousRecording) {
                        mediaRecorder.stop();
                        // Will restart in the onstop handler
                    }
                }, 5000); // Process every 5 seconds
            } catch (error) {
                console.error('Error accessing microphone:', error);
                alert('Error accessing microphone. Please make sure you have granted microphone permissions: ' + error);
            }
        }

        function stopRecording() {
            // Stop continuous recording
            continuousRecording = false;
            clearInterval(recordingInterval);

            // Stop current recording if active
            if (mediaRecorder && mediaRecorder.state !== 'inactive') {
                mediaRecorder.stop();
            }

            // Update UI
            isRecording = false;
            document.getElementById('recordBtn').disabled = false;
            document.getElementById('stopBtn').disabled = true;
            document.getElementById('recordingStatus').style.display = 'none';
        }

        function processRecording() {
            if (audioChunks.length === 0) {
                // No audio data to process
                if (continuousRecording && isRecording) {
                    // Restart recording for continuous mode
                    mediaRecorder.start();
                }
                return;
            }

            const audioBlob = new Blob(audioChunks, { type: 'audio/wav' });
            audioChunks = []; // Clear for next recording

            const formData = new FormData();
            formData.append('file', audioBlob, 'recording.wav');

            document.getElementById('recordLoader').style.display = 'block';

            fetch('/upload', {
                method: 'POST',
                body: formData
            })
            .then(response => response.json())
            .then(data => {
                if (data.file_id) {
                    checkStatus(data.file_id);
                } else {
                    throw new Error('No file ID returned');
                }

                // Restart recording if in continuous mode
                if (continuousRecording && isRecording) {
                    mediaRecorder.start();
                }
            })
            .catch(error => {
                console.error('Error processing recording:', error);
                document.getElementById('recordLoader').style.display = 'none';

                // Still restart recording if in continuous mode, despite error
                if (continuousRecording && isRecording) {
                    mediaRecorder.start();
                }
            });
        }

        // Check the status of processing
        function checkStatus(fileId) {
            fetch(`/status/${fileId}`)
            .then(response => response.json())
            .then(data => {
                if (data.status === 'complete') {
                    document.getElementById('recordLoader').style.display = 'none';
                    document.getElementById('transcription').textContent = data.text;

                    // Add to transcript history
                    addToTranscriptHistory(data.text);
                } else if (data.status === 'error') {
                    document.getElementById('recordLoader').style.display = 'none';
                    if (data.error.includes("Could not understand audio")) {
                        document.getElementById('transcription').textContent = "Didn't catch that. Please try speaking again.";
                    } else {
                        document.getElementById('transcription').textContent = 'Error: ' + data.error;
                    }
                } else {
                    // Still processing, check again in 1 second
                    setTimeout(() => checkStatus(fileId), 1000);
                }
            })
            .catch(error => {
                console.error('Error checking status:', error);
                document.getElementById('recordLoader').style.display = 'none';
            });
        }

        // Add transcription to history
        function addToTranscriptHistory(text) {
            if (!text || text.trim() === '') return;

            const now = new Date();
            const timestamp = now.toLocaleTimeString();

            transcriptHistory.unshift({
                text: text,
                timestamp: timestamp
            });

            // Keep only last 10 entries
            if (transcriptHistory.length > 10) {
                transcriptHistory = transcriptHistory.slice(0, 10);
            }

            // Update the UI
            updateTranscriptHistory();
        }

        // Update transcript history display
        function updateTranscriptHistory() {
            const historyContainer = document.getElementById('transcriptHistory');
            historyContainer.innerHTML = '';

            transcriptHistory.forEach(entry => {
                const entryElement = document.createElement('div');
                entryElement.className = 'transcript-entry';

                const textElement = document.createElement('div');
                textElement.textContent = entry.text;

                const timeElement = document.createElement('div');
                timeElement.className = 'transcript-time';
                timeElement.textContent = entry.timestamp;

                entryElement.appendChild(textElement);
                entryElement.appendChild(timeElement);
                historyContainer.appendChild(entryElement);
            });

            if (transcriptHistory.length === 0) {
                historyContainer.innerHTML = '<p>No transcriptions yet.</p>';
            }
        }
    </script>
</body>
</html>
'''

@app.route('/')
def index():
    return render_template_string(HTML_TEMPLATE)

@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return jsonify({'error': 'No file part'}), 400

    file = request.files['file']

    if file.filename == '':
        return jsonify({'error': 'No selected file'}), 400

    # Generate a unique file ID
    file_id = str(time.time())

    # Save to a temporary file
    temp_dir = tempfile.gettempdir()
    file_path = os.path.join(temp_dir, f"{file_id}.wav")

    # Convert any audio format to WAV for compatibility with SpeechRecognition
    try:
        audio = AudioSegment.from_file(file)
        audio.export(file_path, format="wav")
    except Exception as e:
        return jsonify({'error': f'Error processing audio: {str(e)}'}), 400

    # Add to the processing queue
    audio_queue.put((file_id, file_path))
    results[file_id] = {"status": "processing"}

    return jsonify({'file_id': file_id})

@app.route('/status/<file_id>')
def check_status(file_id):
    if file_id not in results:
        return jsonify({'status': 'not_found'})
    return jsonify(results[file_id])

# Run the app with ngrok (to avoid 403 Forbidden errors)
def run_app_with_ngrok():
    # Set your authtoken here (required)
    ngrok.set_auth_token("2tz1MPC8Ih9qeufiRx4uyeq8Mii_2Q9DWJ7HysQ3TcctFfdrA")

    # Set up ngrok tunnel
    ngrok_tunnel = ngrok.connect(5000)
    public_url = ngrok_tunnel.public_url

    # Start the Flask server in a thread
    threading.Thread(target=lambda: app.run(host='0.0.0.0', port=5000, debug=False, use_reloader=False)).start()

    # Display the access information
    display(HTML(f'''
        <h2>Real-time Voice-to-Text Detection App is running!</h2>
        <p>Access your app at this URL: <a href="{public_url}" target="_blank">{public_url}</a></p>
        <p>Important: Be sure to allow microphone access in your browser when prompted!</p>
        <p>Instructions:</p>
        <ol>
            <li>Click "Start Recording" and begin speaking</li>
            <li>The app will automatically process your speech in real-time (every 5 seconds)</li>
            <li>Click "Stop Recording" when you're done</li>
            <li>View your transcription history below the current transcription</li>
        </ol>
        <p><strong>Note:</strong> If you still encounter issues, make sure pop-ups aren't blocked and try using Chrome browser.</p>
    '''))

# Execute in Colab
if __name__ == '__main__':
    run_app_with_ngrok()

Collecting SpeechRecognition
  Downloading speechrecognition-3.14.3-py3-none-any.whl.metadata (30 kB)
Collecting pyngrok
  Downloading pyngrok-7.3.0-py3-none-any.whl.metadata (8.1 kB)
Downloading speechrecognition-3.14.3-py3-none-any.whl (32.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.9/32.9 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pyngrok-7.3.0-py3-none-any.whl (25 kB)
Installing collected packages: SpeechRecognition, pyngrok
Successfully installed SpeechRecognition-3.14.3 pyngrok-7.3.0


  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):


