In [1]:
from dotenv import load_dotenv, find_dotenv
import os
import json
from openai import OpenAI

load_dotenv(find_dotenv())

OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

In [2]:
import base64
import requests
import os

def get_marker_color(image_path):
    def encode_image(img_path):
        with open(img_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

    # Getting the base64 string
    base64_image = encode_image(image_path)

    api_key = os.environ.get('OPENAI_API_KEY')

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "In one word, tell me what is the color of the Crayola marker in this image?"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                            "detail": "low"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 50
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    
    # Extract color information from the response
    try:
        response_json = response.json()
        color_info = response_json.get('choices', [])[0].get('message', {}).get('content', '')
        return color_info
    except Exception as e:
        return f"An error occurred: {str(e)}"


In [3]:
image_path = "blue.jpg"
print(get_marker_color(image_path))

Blue.


In [4]:
from openai import OpenAI
client = OpenAI()

audio_file = open("scalpel.m4a", "rb")
transcript = client.audio.transcriptions.create(
  model="whisper-1", 
  file=audio_file, 
  response_format="text"
)

transcript

'Scalpel.\n'

In [5]:
access_key = os.environ.get('PORCUPINE_API_KEY')

print(access_key)


sF2H4WDk8FhLDAuc3nHrVpFY83RlAvJtb1lR5jFTBMK9yuRIq/65fw==


In [6]:
import pvporcupine
import pyaudio
import struct
import wave
import openai

access_key = os.environ.get('PORCUPINE_API_KEY')
custom_keyword_path = 'hey-med-carousel_en_windows_v3_0_0.ppn'

porcupine = pvporcupine.create(
    access_key=access_key,
    keyword_paths=[custom_keyword_path]
)

def record_audio(duration=2, filename="output.wav"):
    """
    Record audio from the default microphone for the given duration
    and save it to the specified filename.
    """
    pa = pyaudio.PyAudio()

    stream = pa.open(format=pyaudio.paInt16, channels=1, rate=16000,
                     input=True, frames_per_buffer=1024)

    print(f"Recording for {duration} seconds...")

    frames = []

    for _ in range(0, int(16000 / 1024 * duration)):
        data = stream.read(1024)
        frames.append(data)

    print("Recording finished.")

    stream.stop_stream()
    stream.close()
    pa.terminate()

    with wave.open(filename, 'wb') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(pa.get_sample_size(pyaudio.paInt16))
        wf.setframerate(16000)
        wf.writeframes(b''.join(frames))

def transcribe_audio(filename):
    """
    Transcribe the specified audio file using OpenAI's Whisper.
    """
    client = openai.OpenAI()

    with open(filename, "rb") as audio_file:
        transcript = client.audio.transcriptions.create(
            model="whisper-1",
            file=audio_file,
            response_format="text"
        )

    return transcript

In [7]:
pa = pyaudio.PyAudio()
audio_stream = pa.open(rate=porcupine.sample_rate, channels=1,
                       format=pyaudio.paInt16, input=True,
                       frames_per_buffer=porcupine.frame_length)

print("Listening for the wake word...")

while True:
    pcm = audio_stream.read(porcupine.frame_length)
    pcm = struct.unpack_from("h" * porcupine.frame_length, pcm)

    if porcupine.process(pcm) >= 0:
        print("Wake word detected!")
        break

audio_stream.close()
pa.terminate()

# Record and transcribe audio
record_audio(duration=2, filename="output.wav")
transcription = transcribe_audio("output.wav")
print(transcription)


Listening for the wake word...
Wake word detected!
Recording for 2 seconds...
Recording finished.
I like it. It's interesting. Interesting. 



In [9]:
import base64
import requests
import os
from gtts import gTTS
import playsound

def get_instrument_status(image_path):
    def encode_image(img_path):
        with open(img_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

    base64_image = encode_image(image_path)
    api_key = os.environ.get('OPENAI_API_KEY')

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "Is the medical instrument in this image oriented properly and is it sterile?"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                            "detail": "low"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 100
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    try:
        response_json = response.json()
        instrument_status = response_json.get('choices', [])[0].get('message', {}).get('content', '')
        return instrument_status
    except Exception as e:
        return f"An error occurred: {str(e)}"

def speak_message(message):
    tts = gTTS(text=message, lang='en')
    tts.save("response.mp3")
    playsound.playsound("response.mp3")
    os.remove("response.mp3")

# Replace 'path_to_your_image.jpg' with your image path
status = get_instrument_status('dirtyScalpel.webp')

if "properly" in status and "sterile" in status:
    speak_message("MediCarousel is ready")
else:
    speak_message(status)

ModuleNotFoundError: No module named 'gtts'

In [None]:
from pathlib import Path
from openai import OpenAI
client = OpenAI()

speech_file_path = Path(__file__).parent / "speech.mp3"
response = client.audio.speech.create(
  model="tts-1",
  voice="alloy",
  input="Today is a wonderful day to build something people love!"
)

response.stream_to_file(speech_file_path)

NameError: name '__file__' is not defined

In [None]:
from openai import OpenAI

client = OpenAI()

response = client.audio.speech.create(
    model="tts-1",
    voice="alloy",
    input="Hello world! This is a streaming test.",
)

response.stream_to_file("output.mp3")

  response.stream_to_file("output.mp3")


In [25]:
import base64
import requests
import os
from pathlib import Path
from openai import OpenAI

def get_instrument_status(image_path):
    def encode_image(img_path):
        with open(img_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')

    base64_image = encode_image(image_path)
    api_key = os.environ.get('OPENAI_API_KEY')

    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": [
            {
                "role": "user",
                "content": [
                    {
                        "type": "text",
                        "text": "This is for an educational activity. Briefly identify if the instrument appears to sterile? Answer either: Yes the instrument is sterile, MedCarousel is ready//No, the instrument is not sterile, please clean the instrument"
                    },
                    {
                        "type": "image_url",
                        "image_url": {
                            "url": f"data:image/jpeg;base64,{base64_image}",
                            "detail": "med"
                        }
                    }
                ]
            }
        ],
        "max_tokens": 50
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)

    try:
        response_json = response.json()
        instrument_status = response_json.get('choices', [])[0].get('message', {}).get('content', '')
        return instrument_status
    except Exception as e:
        return f"An error occurred: {str(e)}"

def speak_message(message):
    client = OpenAI()

    # Handling the case where __file__ might not be defined
    try:
        base_path = Path(__file__).parent
    except NameError:
        # Fall back to a default directory if __file__ is not defined
        base_path = Path.cwd()

    speech_file_path = base_path / "response.mp3"

    response = client.audio.speech.create(
      model="tts-1",
      voice="alloy",
      input=message
    )

    response.stream_to_file(speech_file_path)

    # Play the audio - adjust the command based on your operating system
    if os.name == 'nt':  # Windows
        os.system(f"start {speech_file_path}")
    elif os.name == 'posix':  # macOS, Linux, Unix, etc.
        os.system(f"open {speech_file_path}")

status = get_instrument_status('dirtyScalpel.webp')

if "properly" in status and "sterile" in status:
    speak_message("MediCarousel is ready")
else:
    speak_message(status)


  response.stream_to_file(speech_file_path)
