Skip to content

Commit

Permalink
fine-tune on Xiaomi 14
Browse files Browse the repository at this point in the history
  • Loading branch information
zhouwg committed Mar 17, 2024
1 parent 33471ff commit 94559b8
Show file tree
Hide file tree
Showing 7 changed files with 65 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -379,8 +379,11 @@ public class CDEUtils {

private static boolean mIsOfflineDRM = false;

private static int mScreenWidth = 768;
private static int mScreenHeight = 1024;
private static int mScreenWidth = 1024;
private static int mScreenHeight = 768;

private static int mVideoWidth = 640;
private static int mVideoHeight = 360;

public static void setAPKForTV(boolean isAPKForTV) {
mIsAPKForTV = isAPKForTV;
Expand All @@ -397,11 +400,27 @@ public static void setScreenHeight(int height) {
public static int getScreenWidth() {
return mScreenWidth;
}

public static int getmScreenHeight() {
public static int getScreenHeight() {
return mScreenHeight;
}


public static void setVideoWidth(int width) {
mVideoWidth = width;
}

public static void setVideoHeight(int height) {
mVideoHeight = height;
}

public static int getVideoWidth() {
return mVideoWidth;
}
public static int getVideoHeight() {
return mVideoHeight;
}


//FIXME
public static boolean isRunningOnTV() {
if (mScreenWidth > mScreenHeight)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1622,9 +1622,27 @@ private void onASRStart() {
}

CDELog.j(TAG, "asr saved filename:" + CDEUtils.getASRSavedFileName());

//TODO: hardcode path, should be configured in "ASR Settings"
//String ggmlModelFileName = "ggml-small.en.bin"; //31M
String ggmlModelFileName = "ggml-small.en.bin"; //466M
//String ggmlModelFileName = "ggml-small.en.bin"; // 466M
//String ggmlModelFileName = "ggml-tiny-q5_1.bin"; // 31M
//String ggmlModelFileName = "ggml-tiny.en-q5_1.bin"; // 31M
String ggmlModelFileName = "ggml-tiny.en-q8_0.bin"; //42M, very good, about 500-700 ms
CDELog.j(TAG, "asr mode: " + mSettings.getASRMode());
CDELog.j(TAG, "model: " + ggmlModelFileName);

File file = new File(CDEUtils.getDataPath() + ggmlModelFileName);
if (!file.exists()) {
CDELog.j(TAG, "GGML model file not found:" + file.getAbsolutePath());
Toast.makeText(getContext(), "GGML model file not found:" + file.getAbsolutePath(), Toast.LENGTH_SHORT).show();
topBarView.updateTVASRVisibility(false);
CDEUtils.setTVASR(false);
return;
} else {
CDELog.j(TAG, "ASR with GGML model file:" + file.getAbsolutePath());
}

//TODO: preload GGML model and initialize asr_subsystem as early as possible for purpose of ASR real-time performance
CDELog.j(TAG, "asr mode: " + mSettings.getASRMode());
if (1 == mSettings.getASRMode()) {
whispercpp.asr_init(CDEUtils.getDataPath() + ggmlModelFileName, whispercpp.get_cpu_core_counts() / 2, WHISPER_ASR_MODE_PRESURETEST);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1109,6 +1109,8 @@ public void onSurfaceChanged(@NonNull IRenderView.ISurfaceHolder holder, int for

mSurfaceWidth = w;
mSurfaceHeight = h;
CDEUtils.setVideoWidth(w);
CDEUtils.setVideoHeight(h);
boolean isValidState = (mTargetState == STATE_PLAYING);
boolean hasValidSize = !mRenderView.shouldWaitForResize() || (mVideoWidth == w && mVideoHeight == h);
if (mMediaPlayer != null && isValidState && hasValidSize) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,26 +80,20 @@ private void clearSubtitle() {
private List<Caption> searchSubtitle(long duration) {
List<Caption> captionList = new ArrayList<>();
try {
//最小时间
long min = subtitleData.captions.firstKey();
long max = subtitleData.captions.lastKey();
//时间大于最小时间才开始解析
if (duration > min) {
//10秒前的key
long start = duration - 10 * 1000 < min
? subtitleData.captions.firstKey()
: subtitleData.captions.lowerKey(duration - 10 * 1000);
//截取10秒前到结尾的所有字幕
SortedMap<Long, Caption> temp = subtitleData.captions.subMap(start, max);
for (Long key1 : temp.keySet()) {
Caption caption = temp.get(key1);
if (caption == null)
return null;
//开始时间小于当前时间,结束时间大于当前时间, 放宽1ms
if (duration - caption.start.getMseconds() >= -1 && duration <= caption.end.getMseconds()) {
captionList.add(caption);
}
//减少查找时间,从开始大于当前时间开始break, 放宽1ms
if (caption.start.getMseconds() > duration + 1) {
break;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,9 @@ public static void showSubtitle(SubtitleView subtitleView, String subtitle, int
return;
}

if (isTopSubtitle){
//if (isTopSubtitle)
if (true) //modify on 2024-03-16 for real-time subtitle demo
{
subtitleView.setTopTexts(subtitleTexts);
} else {
subtitleView.setBottomTexts(subtitleTexts);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ public class SubtitleView extends View {

private static final float DEFAULT_STOKE_SIZE = 2f;

private static final int DEFAULT_TEXT_COLOR = Color.WHITE;
//private static final int DEFAULT_TEXT_COLOR = Color.WHITE;
private static final int DEFAULT_TEXT_COLOR = Color.BLUE; //modify on 2024-03-16 for real-time subtitle demo

private static final int DEFAULT_STOKE_COLOR = Color.BLACK;

Expand Down
19 changes: 15 additions & 4 deletions external/whispercpp/whisper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7500,7 +7500,7 @@ class whisper_asr {
n_end_time = ggml_time_us();
n_durtion = (n_end_time - n_begin_time) / 1000;

if (n_durtion > 2000) { // 2 seconds
if (n_durtion > 1000) { // 1 seconds, very good on Xiaomi 14, about 500-700 ms with GGML model ggml-tiny.en-q8_0.bin
LOGGD("duration of audio data gathering is: %d milliseconds\n", n_durtion);
LOGGD("size of gathered audio data: %d\n", _n_whisper_in_size);
LOGGD("total audio sample counts %d\n", _n_total_sample_counts);
Expand Down Expand Up @@ -7965,11 +7965,11 @@ void whisper_asr_init(const char * sz_model_path, int num_threads, int n_devmode
params.no_timestamps = true;

params.speed_up = false;
params.debug_mode = true;
params.debug_mode = false;

//params.tdrz_enable = false;//whisper complain failed to compute log mel spectrogram when this flag was enabled
params.suppress_blank = true;
params.suppress_non_speech_tokens = true;
//params.suppress_blank = true;
//params.suppress_non_speech_tokens = true;

memcpy(p_asr_ctx->p_params, &params, sizeof(struct whisper_full_params));

Expand Down Expand Up @@ -8017,4 +8017,15 @@ void whisper_asr_finalize() {

LOGGV("leave whisper_asr_finalize\n");
}

/*
I think complicated or modern C++ syntax should NOT be used for such performance-sensitive
application unless you're a C++ master like Georgi Gerganov.
my mean is that just like pure C implementation in FFmpeg or in
[ggml](https://github.com/ggerganov/ggml): simple / static data structure should be
considered for such performance-sensitive application, memory leak should be considered
carefully at the same time. [ggml](https://github.com/ggerganov/ggml)/[whispercpp](https://github.com/ggerganov/whisper.cpp) 's success
on Xiaomi 14 proves C/C++ language is still a very very very powerful programming language.
*/
//------------------------------------ end added by zhou.weiguo(https://github.com/zhouwg) -------------------------------------

0 comments on commit 94559b8

Please sign in to comment.