In [12]:
!pip install pytube
!pip install librosa numpy scipy transformers torch
!pip install transformers
!pip install requests



In [8]:
!pip install openai==0.28.0



Collecting openai==0.28.0
  Using cached openai-0.28.0-py3-none-any.whl.metadata (13 kB)
Using cached openai-0.28.0-py3-none-any.whl (76 kB)
Installing collected packages: openai
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
litellm 1.52.4 requires openai>=1.54.0, but you have openai 0.28.0 which is incompatible.
llama-index-legacy 0.9.48 requires openai>=1.1.0, but you have openai 0.28.0 which is incompatible.
langchain-openai 0.1.22 requires openai<2.0.0,>=1.40.0, but you have openai 0.28.0 which is incompatible.
dspy-ai 2.4.9 requires openai<2.0.0,>=0.28.1, but you have openai 0.28.0 which is incompatible.
ragstack-ai-langchain 1.0.3 requires langchain==0.1.19, but you have langchain 0.2.16 which is incompatible.
ragstack-ai-langchain 1.0.3 requires langchain-astradb==0.3.0, but you have langchain-astradb 0.4.0 which is incompatible.
ragstack-ai-langc

In [None]:
# video_urls = [
#     "https://www.youtube.com/watch?v=rvWA7ybe8Io",
#     "https://www.youtube.com/watch?v=UdRza_PDCeU",
#     "https://www.youtube.com/watch?v=qb374Zv5ahA",
#     "https://www.youtube.com/watch?v=p6vmUvZa7vI"
# ]

In [13]:
import librosa
import numpy as np
import os

def extract_audio_features(audio_path):
    # Load the audio file
    y, sr = librosa.load(audio_path, sr=None)
    
    # Extract features
    features = {}
    features['mfcc'] = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13).mean(axis=1)
    features['spectral_centroid'] = librosa.feature.spectral_centroid(y=y, sr=sr).mean()
    features['spectral_bandwidth'] = librosa.feature.spectral_bandwidth(y=y, sr=sr).mean()
    features['spectral_rolloff'] = librosa.feature.spectral_rolloff(y=y, sr=sr).mean()
    features['zero_crossing_rate'] = librosa.feature.zero_crossing_rate(y).mean()
    features['rms'] = librosa.feature.rms(y=y).mean()
    features['tempo'], _ = librosa.beat.beat_track(y=y, sr=sr)

    return features


In [14]:
def analyze_voice_characteristics(features):
    insights = {}
    
    # Shrillness
    if features['spectral_centroid'] > 5000:
        insights['shrillness'] = 'High'
    else:
        insights['shrillness'] = 'Normal'

    # Nervousness (high variability in MFCC or tempo irregularity)
    if features['rms'] < 0.02 or features['spectral_bandwidth'] > 2000:
        insights['nervousness'] = 'High'
    else:
        insights['nervousness'] = 'Low'

    # Confidence (higher RMS and stable pitch)
    if features['rms'] > 0.03 and features['spectral_bandwidth'] < 1500:
        insights['confidence'] = 'High'
    else:
        insights['confidence'] = 'Low'

    return insights


In [16]:
import librosa.display

def extract_timestamps_and_insights(audio_path, interval=5):
    y, sr = librosa.load(audio_path, sr=None)
    duration = librosa.get_duration(y=y, sr=sr)

    insights = []
    for start in range(0, int(duration), interval):
        # Calculate sample indices for the segment
        start_sample = start * sr
        end_sample = min((start + interval) * sr, len(y))
        
        # Extract segment
        segment = y[start_sample:end_sample]
        
        # Extract features from the segment
        features = {
            'mfcc': librosa.feature.mfcc(y=segment, sr=sr, n_mfcc=13).mean(axis=1),
            'spectral_centroid': librosa.feature.spectral_centroid(y=segment, sr=sr).mean(),
            'spectral_bandwidth': librosa.feature.spectral_bandwidth(y=segment, sr=sr).mean(),
            'spectral_rolloff': librosa.feature.spectral_rolloff(y=segment, sr=sr).mean(),
            'zero_crossing_rate': librosa.feature.zero_crossing_rate(segment).mean(),
            'rms': librosa.feature.rms(y=segment).mean(),
            'tempo': librosa.beat.beat_track(y=segment, sr=sr)[0]
        }

        # Analyze voice characteristics
        voice_insights = analyze_voice_characteristics(features)
        
        # Append timestamped insights
        insights.append({
            'timestamp': f'{start}-{start + interval} seconds',
            'insights': voice_insights
        })

    return insights



In [5]:
#genai
#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
#!huggingface-cli login

In [80]:
!pip show openai


Name: openai
Version: 0.28.0
Summary: Python client library for the OpenAI API
Home-page: https://github.com/openai/openai-python
Author: OpenAI
Author-email: support@openai.com
License: 
Location: /Users/chethana/anaconda3/lib/python3.11/site-packages
Requires: aiohttp, requests, tqdm
Required-by: astra-assistants, crewai, dspy-ai, embedchain, instructor, langchain-openai, litellm, llama-index, llama-index-agent-openai, llama-index-core, llama-index-legacy, pyautogen, ragas


In [24]:
from transformers import pipeline

import requests
import json
def generate_contextual_insights_ollama(audio_insights, model="llama3.2"):
    url = "http://127.0.0.1:11434/api/chat"

    # Refined prompt
    prompt = (
        "DO NOT GIVE ME ANY CODE. DO NOT GENERATE CODDE. ONLY TEXTUAL ANSWERS. The following JSON contains audio insights. Your task is to analyze and provide a detailed textual report. "
        "For each timestamp, describe the speaker's shrillness, nervousness, and confidence levels, and highlight any "
        "patterns or notable changes. Do not include any Python code or technical explanations."
        "\n\nAudio Insights:\n"
        f"{json.dumps(audio_insights, indent=2)}"
    )

    payload = {
        "model": model,
        "messages": [{"role": "user", "content": prompt}]
    }

    try:
        print("Connecting to Ollama server...")
        response = requests.post(url, json=payload, stream=True)
        response.raise_for_status()

        # Collect the streamed response
        full_response = ""
        for line in response.iter_lines(decode_unicode=True):
            if line:
                message = json.loads(line)
                content = message.get("message", {}).get("content", "")
                if "python" in content or "import " in content:
                    print("Detected code; stopping response collection.")
                    break  # Stop processing further if code patterns are detected
                full_response += content

                if message.get("done"):
                    break

        print("Connected successfully.")
        return full_response.strip()

    except requests.exceptions.RequestException as e:
        print(f"Error connecting to Ollama API: {e}")
        return None

import openai
import json

# Set your OpenAI API key
API_KEY = "sk-proj-lq9YgL6JJ1epFO8WIt30T3BlbkFJFhlPpmzby23CIQvnjtYg"
openai.api_key = "sk-proj-lq9YgL6JJ1epFO8WIt30T3BlbkFJFhlPpmzby23CIQvnjtYg"


def generate_contextual_insights_gpt4(audio_insights):
    # Refined prompt for GPT-4
    prompt = (
        "Analyze the following data representing audio insights and provide a detailed textual report. "
        "For each timestamp, describe the speaker's shrillness, nervousness, and confidence levels, and highlight any "
        "patterns or notable changes. Avoid including any code or technical explanations."
        "\n\nData:\n"
        f"{json.dumps(audio_insights, indent=2)}"
    )

    try:
        # Call the OpenAI Chat Completion endpoint
        response = openai.ChatCompletion.create(
            model="gpt-4",  # GPT-4 model
            messages=[
                {"role": "system", "content": "You are a helpful assistant specializing in data analysis."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.7,  # Adjust creativity level
            max_tokens=500  # Adjust token limit as needed
        )

        # Extract the model's reply
        insights = response.choices[0].message.content.strip()
        return insights

    except openai.error.OpenAIError as e:
        print(f"Error with OpenAI API: {e}")
        return None


#sk-proj-lq9YgL6JJ1epFO8WIt30T3BlbkFJFhlPpmzby23CIQvnjtYg


In [18]:
audio_path = "/Users/chethana/Downloads/SAMPLE INTERVIEW FROM THE CLIENT_4.mp3"

interval = 5  # Analyze every 5 seconds

# Extract insights
audio_insights = extract_timestamps_and_insights(audio_path, interval)
print("Audio Insights:", audio_insights)



Audio Insights: [{'timestamp': '0-5 seconds', 'insights': {'shrillness': 'Normal', 'nervousness': 'Low', 'confidence': 'High'}}, {'timestamp': '5-10 seconds', 'insights': {'shrillness': 'Normal', 'nervousness': 'High', 'confidence': 'Low'}}, {'timestamp': '10-15 seconds', 'insights': {'shrillness': 'Normal', 'nervousness': 'Low', 'confidence': 'Low'}}, {'timestamp': '15-20 seconds', 'insights': {'shrillness': 'Normal', 'nervousness': 'Low', 'confidence': 'High'}}, {'timestamp': '20-25 seconds', 'insights': {'shrillness': 'Normal', 'nervousness': 'Low', 'confidence': 'High'}}, {'timestamp': '25-30 seconds', 'insights': {'shrillness': 'Normal', 'nervousness': 'Low', 'confidence': 'Low'}}, {'timestamp': '30-35 seconds', 'insights': {'shrillness': 'Normal', 'nervousness': 'Low', 'confidence': 'Low'}}, {'timestamp': '35-40 seconds', 'insights': {'shrillness': 'Normal', 'nervousness': 'Low', 'confidence': 'Low'}}, {'timestamp': '40-45 seconds', 'insights': {'shrillness': 'Normal', 'nervousne

In [25]:
# Generate contextual insights using an LLM
contextual_insights = generate_contextual_insights_gpt4(audio_insights)
print("Contextual Insights:", contextual_insights)

Contextual Insights: Based on the provided data, the speaker's shrillness remained consistently normal throughout the entire duration of the speech. This indicates a steady vocal pitch and intensity, and it suggests that the speaker maintained a stable vocal quality without any sudden pitch changes or high-pitched vocalizations.

The speaker's nervousness levels fluctuated throughout the speech. The speaker started off with low nervousness in the first 5 seconds, but there was a sudden spike to high nervousness between 5-10 seconds. After this, the speaker's nervousness dropped back to low and remained so until around 150-155 seconds. From 150-155 seconds onwards, the speaker's nervousness levels varied between low and high at different intervals. This intermittent surge in nervousness could indicate moments of stress or discomfort.

The speaker's confidence levels also varied throughout the speech. The speaker started off with high confidence in the first 5 seconds but then dropped to

In [9]:
#!pip uninstall openai==1.54.3 -y
!pip show openai




Name: openai
Version: 0.28.0
Summary: Python client library for the OpenAI API
Home-page: https://github.com/openai/openai-python
Author: OpenAI
Author-email: support@openai.com
License: 
Location: /Users/chethana/anaconda3/lib/python3.11/site-packages
Requires: aiohttp, requests, tqdm
Required-by: astra-assistants, crewai, dspy-ai, embedchain, instructor, langchain-openai, litellm, llama-index, llama-index-agent-openai, llama-index-core, llama-index-legacy, pyautogen, ragas


In [10]:
import openai
print(openai.__version__)


0.28.0
