# Extracting knowledge from video files

This notebooks uses FFMPEG and Whisper to work. Make sure to have `ffmpeg.exe` in teh same folder as this notebook

## Load the AI workbench

In [None]:
#i "nuget:https://pkgs.dev.azure.com/dnceng/public/_packaging/dotnet-tools/nuget/v3/index.json"

In [None]:
#r "nuget:Microsoft.DotNet.Interactive.AI, *-*"

### only for local builds

In [None]:
#r "nuget:Microsoft.DotNet.Interactive.AI, 1.0.0-beta.23604.2"


## Loading Whisper.net

In [None]:
#r "nuget: Whisper.net, 1.4.6"


In [None]:
#r "nuget: Whisper.net.Runtime, 1.4.6"

### nasty workaround to get runtimes in the right place. Works only on windows :)

In [None]:
using System;
using System.IO;
using System.Threading.Tasks;
using Whisper.net;

var whisperLocation = Path.GetDirectoryName(typeof(WhisperFactory).Assembly.Location);
var runtimeDirectoty = Directory.CreateDirectory(Path.Combine(whisperLocation, "runtimes", "win-x64"));
var srcRuntimeDicrectory = Path.Combine( new DirectoryInfo( Path.Combine( whisperLocation.Replace("whisper.net", "whisper.net.runtime"), "..", "..")).FullName,"build","win-x64");
var srcFilePath = Path.Combine(srcRuntimeDicrectory, "whisper.dll");
// var dstFilePath = Path.Combine(runtimeDirectoty.FullName, "whisper.dll");
// if(!File.Exists(dstFilePath))
// {
//     File.Copy(srcFilePath, dstFilePath, true);
// }

## Process video file and extract audio

In [None]:
#r "nuget: FFMpegCore, 5.1.0"
#r "nuget: NAudio, 2.1.0"

### Get audio from video

In [None]:
using FFMpegCore;

var videoFile = new FileInfo(@"C:\Users\dicolomb\OneDrive - Microsoft\Pictures\A Deep Dive into Artificial Intelligence_ Microsoft x Our Little Uni_Trim.mp4");
var audioFile = new FileInfo(videoFile.FullName.Replace(videoFile.Extension, ".mp3"));
var wavFile = Path.ChangeExtension(audioFile.FullName, ".wav");

In [None]:
FFMpeg.ExtractAudio(videoFile.FullName, audioFile.FullName);

### Convert mp3 to wav file and save to disk

In [None]:
using NAudio.Wave.SampleProviders;
using NAudio.Wave;

var fileStream = File.OpenRead(audioFile.FullName);
var wavStream = new MemoryStream();

var reader = new Mp3FileReader(fileStream);
var resampler = new WdlResamplingSampleProvider(reader.ToSampleProvider(), 16000);
WaveFileWriter.WriteWavFileToStream(wavStream, resampler.ToWaveProvider16());
wavStream.Seek(0, SeekOrigin.Begin);
File.OpenWrite(wavFile).Write(wavStream.ToArray());

## now we have a wav filed on disk, lets use whisper to extract data

### load model

In [None]:
using Whisper.net;
using Whisper.net.Ggml;

var modelName = "ggml-base.bin";
if (!File.Exists(modelName))
{
    using var modelStream = await WhisperGgmlDownloader.GetGgmlModelAsync(GgmlType.Base);
    using var fileWriter = File.OpenWrite(modelName);
    await modelStream.CopyToAsync(fileWriter);
}

### start processing

In [None]:
var whisperFactory = WhisperFactory.FromPath("ggml-base.bin", libraryPath: srcFilePath);

In [None]:
var processor = whisperFactory.CreateBuilder()
    .WithLanguage("auto")
    .Build();

var wavFileStream = File.OpenRead(wavFile);

var results = new List<SegmentData>();

await foreach(var result in processor.ProcessAsync(wavFileStream))
{
    results.Add(result);
    Console.WriteLine($"{result.Start}->{result.End}: {result.Text}");
}

In [None]:
var filtered = results.Where(r => !r.Text.Contains("[BLANK_AUDIO]")).ToList();

In [None]:
filtered.DisplayTable();