Skip to content

Commit

Permalink
Removed SAD
Browse files Browse the repository at this point in the history
  • Loading branch information
alumae committed May 25, 2022
1 parent 1c2068f commit 9028b47
Showing 1 changed file with 27 additions and 72 deletions.
99 changes: 27 additions & 72 deletions scripts/diarization.sh
Original file line number Diff line number Diff line change
Expand Up @@ -6,19 +6,6 @@ if [ -z $LOCALCLASSPATH ]; then
fi


DO_MUSIC_DETECTION=false

while getopts ":m" opt; do
case $opt in
m)
echo "Going to do music/jingle detection"
DO_MUSIC_DETECTION=true
;;
\?)
echo "Invalid option: -$OPTARG" >&2
;;
esac
done


#the MFCC file
Expand Down Expand Up @@ -62,28 +49,24 @@ show="show"

#set the java virtual machine program
java=java
if [ -n $JAVA_BIN ]; then
java=$JAVA_BIN
fi


#define the directory where the results will be saved
datadir=`dirname $uem`

#define where the UBM GMM is
ubm=models/ubm.gmm
ubm="models/ubm.gmm"


#define where the speech / non-speech set of GMMs is
#pmsgmm=./model/sms.gmms
pmsgmm=models/sms.gmms
pmsgmm="models/sms.gmms"

#define where the silence set of GMMs is
sgmm=models/s.gmms
sgmm="models/s.gmms"

#define where the gender and bandwidth set of GMMs (4 models) is
#(female studio, male studio, female telephone, male telephone)
ggmm=models/gender.gmms
ggmm="models/gender.gmms"


echo "#####################################################"
Expand All @@ -92,94 +75,66 @@ echo "#####################################################"



iseg=./$datadir/$show.i.seg
pmsseg=./$datadir/$show.pms.seg

iseg=$datadir/$show.i.seg
pmsseg=$datadir/$show.pms.seg

adjseg=./$datadir/$show.adj.h.seg

adjseg=$datadir/$show.adj.h.seg

# Check the validity of the MFCC
$java -Xmx4096m -classpath $LOCALCLASSPATH fr.lium.spkDiarization.programs.MSegInit --trace --help \
--fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=$uem --sOutputMask=./$datadir/show.i.seg $show
--fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=$uem --sOutputMask=$datadir/show.i.seg $show

# Speech / non-speech segmentation using a set of GMMs
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MDecode --trace --help \
--fInputDesc=audio2sphinx,1:3:2:0:0:0,13,0:0:0 --fInputMask=$features --sInputMask=$iseg \
--sOutputMask=$pmsseg --dPenality=1000,1000,10 --tInputMask=$pmsgmm $show


# GLR-based segmentation, make small segments
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MSeg --trace --help \
--kind=FULL --sMethod=GLR --fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.i.seg \
--sOutputMask=./$datadir/show.s.seg $show
--kind=FULL --sMethod=GLR --fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=$datadir/show.i.seg \
--sOutputMask=$datadir/show.s.seg $show

# Linear clustering, fuse consecutive segments of the same speaker from the start to the end
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MClust --trace --help \
--fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.s.seg \
--sOutputMask=./$datadir/show.l.seg --cMethod=l --cThr=2.5 $show
--fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=$datadir/show.s.seg \
--sOutputMask=$datadir/show.l.seg --cMethod=l --cThr=2.5 $show

# Hierarchical bottom-up BIC clustering
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MClust --trace --help \
--fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.l.seg \
--sOutputMask=./$datadir/show.h.seg --cMethod=h --cThr=6 $show
--fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=$datadir/show.l.seg \
--sOutputMask=$datadir/show.h.seg --cMethod=h --cThr=6 $show

# Initialize one speaker GMM with 8 diagonal Gaussian components for each cluster
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MTrainInit --help --trace \
--nbComp=8 --kind=DIAG --fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.h.seg \
--tOutputMask=./$datadir/show.init.gmms $show
--nbComp=8 --kind=DIAG --fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=$datadir/show.h.seg \
--tOutputMask=$datadir/show.init.gmms $show

# EM computation for each GMM
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MTrainEM --help --trace \
--nbComp=8 --kind=DIAG --fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.h.seg \
--tOutputMask=./$datadir/show.gmms --tInputMask=./$datadir/show.init.gmms $show
--nbComp=8 --kind=DIAG --fInputMask=$features --fInputDesc=$fInputDesc --sInputMask=$datadir/show.h.seg \
--tOutputMask=$datadir/show.gmms --tInputMask=$datadir/show.init.gmms $show

# Viterbi decoding using the set of GMMs trained by EM
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MDecode --trace --help \
--fInputMask=${features} --fInputDesc=$fInputDesc --sInputMask=./$datadir/show.h.seg \
--sOutputMask=./$datadir/show.d.seg --dPenality=250 --tInputMask=$datadir/show.gmms $show
--fInputMask=${features} --fInputDesc=$fInputDesc --sInputMask=$datadir/show.h.seg \
--sOutputMask=$datadir/show.d.seg --dPenality=250 --tInputMask=$datadir/show.gmms $show

# Adjust segment boundaries near silence sections
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.tools.SAdjSeg --help --trace \
--fInputMask=$features --fInputDesc=audio2sphinx,1:1:0:0:0:0,13,0:0:0 --sInputMask=./$datadir/show.d.seg \
--fInputMask=$features --fInputDesc=audio2sphinx,1:1:0:0:0:0,13,0:0:0 --sInputMask=$datadir/show.d.seg \
--sOutputMask=$adjseg $show


if [ "$DO_MUSIC_DETECTION" = true ]; then

# Filter speaker segmentation according to speech / non-speech segmentation
flt1seg=./$datadir/$show.flt1.seg
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.tools.SFilter --help --trace \
--fInputDesc=audio2sphinx,1:3:2:0:0:0,13,0:0:0 --fInputMask=$features --fltSegMinLenSpeech=150 --fltSegMinLenSil=25 \
--sFilterClusterName=music --fltSegPadding=25 --sFilterMask=$pmsseg --sInputMask=$adjseg --sOutputMask=$flt1seg $show

flt2seg=./$datadir/$show.flt2.seg
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.tools.SFilter --help --trace \
--fInputDesc=audio2sphinx,1:3:2:0:0:0,13,0:0:0 --fInputMask=$features --fltSegMinLenSpeech=150 --fltSegMinLenSil=25 \
--sFilterClusterName=jingle --fltSegPadding=25 --sFilterMask=$pmsseg --sInputMask=$flt1seg --sOutputMask=$flt2seg $show


# Split segments longer than 20s (useful for transcription)
splseg=./$datadir/$show.spl.seg
splseg=$datadir/$show.spl.seg
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.tools.SSplitSeg --help \
--sFilterMask=$pmsseg --sFilterClusterName=iS,iT,j --sInputMask=$flt2seg --sSegMaxLen=2000 --sSegMaxLenModel=2000 \
--sFilterMask=$datadir/show.i.seg --sFilterClusterName=iS,iT,j --sInputMask=$adjseg --sSegMaxLen=2000 --sSegMaxLenModel=2000 \
--sOutputMask=$splseg --fInputMask=$features --fInputDesc=audio2sphinx,1:3:2:0:0:0,13,0:0:0 --tInputMask=$sgmm $show

else

# Split segments longer than 20s (useful for transcription)
splseg=./$datadir/$show.spl.seg
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.tools.SSplitSeg --help \
--sFilterMask=$pmsseg --sFilterClusterName=iS,iT,j --sInputMask=$adjseg --sSegMaxLen=2000 --sSegMaxLenModel=2000 \
--sOutputMask=$splseg --fInputMask=$features --fInputDesc=audio2sphinx,1:3:2:0:0:0,13,0:0:0 --tInputMask=$sgmm $show

fi



#-------------------------------------------------------------------------------
# Set gender and bandwidth
gseg=./$datadir/$show.g.seg
gseg=$datadir/$show.g.seg
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MScore --help \
--sGender --sByCluster --fInputDesc=audio2sphinx,1:3:2:0:0:0,13,1:1:0 --fInputMask=$features --sInputMask=$splseg \
--sOutputMask=$gseg --tInputMask=$ggmm $show
Expand All @@ -189,9 +144,9 @@ $java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MSc
# NCLR clustering
# Features contain static and delta and are centered and reduced (--fInputDesc)
c=1.7
spkseg=./$datadir/$show.c.seg
spkseg=$datadir/$show.c.seg
$java -Xmx4096m -classpath "$LOCALCLASSPATH" fr.lium.spkDiarization.programs.MClust --help --trace \
--fInputMask=$features --fInputDesc=$fInputDescCLR --sInputMask=$gseg \
--sOutputMask=./$datadir/show.seg --cMethod=ce --cThr=$c --tInputMask=$ubm \
--emCtrl=1,5,0.01 --sTop=5,$ubm --tOutputMask=./$datadir/$show.c.gmm $show
--sOutputMask=$datadir/show.seg --cMethod=ce --cThr=$c --tInputMask=$ubm \
--emCtrl=1,5,0.01 --sTop=5,$ubm --tOutputMask=$datadir/$show.c.gmm $show

0 comments on commit 9028b47

Please sign in to comment.