talk-llama : add new example + sync ggml from llama.cpp (#664)

* talk-llama : talk with LLaMA AI * talk.llama : disable EOS token * talk-llama : add README instructions * ggml : fix build in debug
ggerganov · Mar 27, 2023 · 4a0deb8 · 4a0deb8
1 parent 8e361d9
commit 4a0deb8
Show file tree

Hide file tree

Showing 14 changed files with 5,112 additions and 579 deletions.
diff --git a/.gitignore b/.gitignore
@@ -18,6 +18,7 @@ build-sanitize-thread/
 /stream
 /command
 /talk
+/talk-llama
 /bench
 
 arm_neon.h
@@ -32,3 +33,5 @@ examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
 examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
 
 extra/bench-gg.txt
+
+*.mlmodel*
diff --git a/Makefile b/Makefile
@@ -36,7 +36,7 @@ LDFLAGS  =
 
 # ref: https://github.com/ggerganov/whisper.cpp/issues/37
 ifneq ($(wildcard /usr/include/musl/*),)
-	CFLAGS   += -D_POSIX_SOURCE -D_GNU_SOURCE
+	CFLAGS += -D_POSIX_SOURCE -D_GNU_SOURCE
 	CXXFLAGS += -D_POSIX_SOURCE -D_GNU_SOURCE
 endif
 
@@ -178,7 +178,7 @@ $(info I CC:       $(CCV))
 $(info I CXX:      $(CXXV))
 $(info )
 
-default: main
+default: main bench
 
 #
 # Build library
@@ -197,7 +197,7 @@ libwhisper.so: ggml.o whisper.o
 	$(CXX) $(CXXFLAGS) -shared -o libwhisper.so ggml.o whisper.o $(LDFLAGS)
 
 clean:
-	rm -f *.o main stream command talk bench libwhisper.a libwhisper.so
+	rm -f *.o main stream command talk talk-llama bench libwhisper.a libwhisper.so
 
 #
 # Examples
@@ -212,6 +212,9 @@ main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
 	$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
 	./main -h
 
+bench: examples/bench/bench.cpp ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
+
 stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
 	$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
 
@@ -221,8 +224,8 @@ command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whi
 talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
 	$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
 
-bench: examples/bench/bench.cpp ggml.o whisper.o
-	$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)
+talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
+	$(CXX) $(CXXFLAGS) examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk-llama $(CC_SDL) $(LDFLAGS)
 
 #
 # Audio samples

diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
@@ -63,4 +63,5 @@ else()
     add_subdirectory(command)
     add_subdirectory(bench)
     add_subdirectory(talk)
+    add_subdirectory(talk-llama)
 endif()
diff --git a/examples/talk-llama/.gitignore b/examples/talk-llama/.gitignore
@@ -0,0 +1,2 @@
+eleven-labs.py
+audio.mp3
diff --git a/examples/talk-llama/CMakeLists.txt b/examples/talk-llama/CMakeLists.txt
@@ -0,0 +1,10 @@
+if (WHISPER_SUPPORT_SDL2)
+    # talk-llama
+    set(TARGET talk-llama)
+
+    add_executable(${TARGET} talk-llama.cpp llama.cpp)
+
+    include(DefaultTargetOptions)
+
+    target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
+endif ()
diff --git a/examples/talk-llama/README.md b/examples/talk-llama/README.md
@@ -0,0 +1,32 @@
+# talk-llama
+
+Talk with an LLaMA AI in your terminal
+
+[Demo Talk](https://user-images.githubusercontent.com/1991296/228024237-848f998c-c334-46a6-bef8-3271590da83b.mp4)
+
+## Building
+
+The `talk-llama` tool depends on SDL2 library to capture audio from the microphone. You can build it like this:
+
+```bash
+# Install SDL2 on Linux
+sudo apt-get install libsdl2-dev
+
+# Install SDL2 on Mac OS
+brew install sdl2
+
+# Build the "talk-llama" executable
+make talk-llama
+
+# Run it
+./talk-llama -mw ./models/ggml-small.en.bin -ml ../llama.cpp/models/13B/ggml-model-q4_0.bin -p "Georgi" -t 8
+```
+
+- The `-mw` argument specifies the Whisper model that you would like to use. Recommended `base` or `small` for real-time experience
+- The `-ml` argument specifies the LLaMA model that you would like to use. Read the instructions in https://github.com/ggerganov/llama.cpp for information about how to obtain a `ggml` compatible LLaMA model
+
+## TTS
+
+For best experience, this example needs a TTS tool to convert the generated text responses to voice.
+You can use any TTS engine that you would like - simply edit the [speak.sh](speak.sh) script to your needs.
+By default, it is configured to use MacOS's `say`, but you can use whatever you wish.