## Part, the first
### Setting up MFA

Upgrading python from [this notebook](https://www.kaggle.com/code/svaningelgem/upgrade-to-python-3-10)

In [1]:
%%capture
!conda create -n py310 -y
!source /opt/conda/bin/activate py310 && conda install python=3.10 jupyter montreal-forced-aligner mamba -y

In [2]:
!rm /opt/conda/bin/python3
!ln -sf /opt/conda/envs/py310/bin/python3 /opt/conda/bin/python3
!rm /opt/conda/bin/python3.7
!ln -sf /opt/conda/envs/py310/bin/python3 /opt/conda/bin/python3.7
!rm /opt/conda/bin/python
!ln -sf /opt/conda/envs/py310/bin/python3 /opt/conda/bin/python

To create the same data, fork and run [this notebook](https://www.kaggle.com/jimregan/scrape-fuaimeanna-ie)

In [3]:
!mkdir /tmp/m
!mkdir /tmp/c
!mkdir /tmp/u

!cp ../input/scrape-fuaimeanna-private/wav/*s1.wav /tmp/u
!cp ../input/scrape-fuaimeanna-private/wav/*s2.wav /tmp/m
!cp ../input/scrape-fuaimeanna-private/wav/*s3.wav /tmp/c

In [4]:
%%writefile /tmp/fuaimeanna-write.pl
#!/usr/bin/perl
use warnings;
use strict;
use utf8;

binmode(STDIN, ":utf8");
binmode(STDOUT, ":utf8");
binmode(STDERR, ":utf8");

my %cr_files = (
	'mo shmidiú' => 'mo chuid smidiú',
	'mo shmior' => 'mo chuid smior',
	'mo shmólach' => 'mo smólach',
	'shmachtaigh' => 'smachtaigh',
	'shmaoinigh' => 'smaoinigh',
	'shmear' => 'smear',
	'deamhain' => 'diabhail',
	'folach' => 'i bhfolach',
	'captaen' => 'caiptín',
	'oirthe' => 'feilte',
);
my %empty = (
	'/sounds/gob_i3_s3.mp3' => 1,
	'/sounds/iioctha_i3_s3.mp3' => 1,
	'/sounds/mo_shuiiochaan_i3_s3.mp3' => 1,
	'/sounds/riail_i3_s3.mp3' => 1
);

open(LEXM, '>>', '/tmp/lexicon-munster.raw');
binmode LEXM, ':utf8';
open(LEXU, '>>', '/tmp/lexicon-ulster.raw');
binmode LEXU, ':utf8';
open(LEXC, '>>', '/tmp/lexicon-connaught.raw');
binmode LEXC, ':utf8';

sub write_text {
	my $file = shift;
	my $text = shift;
	open(OUTF, '>>', $file);
	binmode OUTF, ':utf8';
	print OUTF $text;
	close OUTF;
}

sub write_pron {
	my $file = shift;
	my $text = shift;
	my $pron = shift;
	if ($text eq 'ar tí') {
		$pron =~ s/ \. ˈ / # /g;
	}
	$pron =~ s/ [ˈˌ] / /g;
	$pron =~ s/^[ˈˌ] //g;
	$pron =~ s/ \. / /g;
	my @words = split/ /, $text;
	my @prons = split/ \# /, $pron;
	if($#words != $#prons) {
		print STDERR "ERROR: $file $text $pron\n";
	}
	if($#words == 0) {
		print $file "$text $pron\n";
	} else {
		for(my $i = 0; $i <= $#words; $i++) {
			print $file "$words[$i] $prons[$i]\n";
		}
	}
}

while(<STDIN>) {
	chomp;
	my @line = split/\t/;
	next if($line[0] eq 'Orthographic');
	my $text = lc($line[0]);
	next if($line[0] eq "d'fhág");
	my $uout = $line[1];
	$uout =~ s!/sounds/!!;
	$uout =~ s/\.mp3$/.txt/;
	my $cout = $line[3];
	$cout =~ s!/sounds/!!;
	$cout =~ s/\.mp3$/.txt/;
	my $mout = $line[5];
	$mout =~ s!/sounds/!!;
	$mout =~ s/\.mp3$/.txt/;
	$uout = '/tmp/u/' . $uout;
	$cout = '/tmp/c/' . $cout;
	$mout = '/tmp/m/' . $mout;

	my $pronu = $line[2];
	my $pronc = $line[4];
	my $pronm = $line[6];

	if($text eq 'Gaeilge') {
		write_text($uout, "gaeilic");
		write_text($cout, "gaeilge");
		write_text($mout, "gaelainn");
		write_pron(\*LEXU, "gaeilic", $pronu);
		write_pron(\*LEXC, "gaeilge", $pronc);
		write_pron(\*LEXM, "gaelainn", $pronm);
		next;
	}
	if($line[0] eq 'bocht' || $line[0] eq 'teacht' || $line[0] eq 'teocht') {
		$pronu =~ s/x t̪ˠ/ɾˠ t̪ˠ/;
	}
	write_text($uout, $text);
	write_pron(\*LEXU, $text, $pronu);
	write_text($mout, $text);
	write_pron(\*LEXM, $text, $pronm);
	if(!exists $empty{$line[3]}) {
		my $cfix = exists $cr_files{$text} ? $cr_files{$text} : $text;
		write_text($cout, $cfix);
		write_pron(\*LEXC, $cfix, $pronc);
	}
}

Writing /tmp/fuaimeanna-write.pl


In [5]:
!cat ../input/scrape-fuaimeanna-private/all-fuaimeanna-data.tsv | perl /tmp/fuaimeanna-write.pl

In [6]:
!cat /tmp/lexicon-connaught.raw | sort | uniq > /tmp/lexicon-connaught.txt
!cat /tmp/lexicon-ulster.raw | sort | uniq > /tmp/lexicon-ulster.txt
!cat /tmp/lexicon-munster.raw | sort | uniq > /tmp/lexicon-munster.txt
!cat /tmp/lexicon-connaught.raw /tmp/lexicon-ulster.raw /tmp/lexicon-munster.raw | sort | uniq > /tmp/lexicon-all.txt

In [7]:
!mkdir /tmp/all
!cp /tmp/c/* /tmp/all
!cp /tmp/m/* /tmp/all
!cp /tmp/u/* /tmp/all
!mkdir /tmp/mfa-temp

### Run MFA

In [8]:
!/opt/conda/envs/py310/bin/mfa train -t /tmp/mfa-temp --output_directory /tmp/textgrid-munster /tmp/m /tmp/lexicon-munster.txt ./munster-model
!/opt/conda/envs/py310/bin/mfa train -t /tmp/mfa-temp --output_directory /tmp/textgrid-ulster /tmp/u /tmp/lexicon-ulster.txt ./ulster-model
!/opt/conda/envs/py310/bin/mfa train -t /tmp/mfa-temp --output_directory /tmp/textgrid-connaught /tmp/c /tmp/lexicon-connaught.txt ./connaught-model
!/opt/conda/envs/py310/bin/mfa train -t /tmp/mfa-temp --output_directory /tmp/textgrid-all /tmp/all /tmp/lexicon-all.txt ./all-model

Traceback (most recent call last):
  File "/opt/conda/envs/py310/bin/mfa", line 10, in <module>
    sys.exit(mfa_cli())
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/rich_click/rich_group.py", line 21, in main
    rv = super().main(*args, standalone_mode=False, **kwargs)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 783, in invoke
    return __callback(*args, **kwargs)
  File "/opt/conda/envs/py310/lib/python3.1

In [9]:
!/opt/conda/envs/py310/bin/mfa train_g2p -t /tmp/mfa-temp /tmp/lexicon-ulster.txt ./g2p-ulster
!/opt/conda/envs/py310/bin/mfa train_g2p -t /tmp/mfa-temp /tmp/lexicon-munster.txt ./g2p-munster
!/opt/conda/envs/py310/bin/mfa train_g2p -t /tmp/mfa-temp /tmp/lexicon-connaught.txt ./g2p-connaught
!/opt/conda/envs/py310/bin/mfa train_g2p -t /tmp/mfa-temp /tmp/lexicon-all.txt ./g2p-all

Traceback (most recent call last):
  File "/opt/conda/envs/py310/bin/mfa", line 10, in <module>
    sys.exit(mfa_cli())
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 1157, in __call__
    return self.main(*args, **kwargs)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/rich_click/rich_group.py", line 21, in main
    rv = super().main(*args, standalone_mode=False, **kwargs)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 1078, in main
    rv = self.invoke(ctx)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 1685, in invoke
    super().invoke(ctx)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 1434, in invoke
    return ctx.invoke(self.callback, **ctx.params)
  File "/opt/conda/envs/py310/lib/python3.10/site-packages/click/core.py", line 783, in invoke
    return __callback(*args, **kwargs)
  File "/opt/conda/envs/py310/lib/python3.1