-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
44 changed files
with
2,060 additions
and
363 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,153 @@ | ||
#!/usr/bin/env perl | ||
|
||
use strict; | ||
use warnings; | ||
|
||
use Archive::Zip qw( :ERROR_CODES ); | ||
use Path::Class; | ||
use POSIX; | ||
use File::Path qw(remove_tree); | ||
|
||
my ($archive) = @ARGV; | ||
|
||
# Unpack zip archive | ||
my $tmp_dir_name = $archive; | ||
$tmp_dir_name =~ s/\.zip$//g; | ||
|
||
if ($tmp_dir_name eq $archive){ | ||
die "Archive $archive is not a .zip file"; | ||
} | ||
|
||
my $archive_file = file($archive); | ||
|
||
my $temp_dir = dir($tmp_dir_name); | ||
$temp_dir->mkpath; | ||
|
||
print "Unzipping archive.."; | ||
my $zip = Archive::Zip->new(); | ||
unless( $zip->read("$archive_file") == AZ_OK ){ | ||
die "Could not inflate zip $archive_file - $!"; | ||
} | ||
$zip->extractTree( '', $temp_dir ); | ||
print "Done\n"; | ||
|
||
my $STRING_TO_REMOVE = qr/_premix-w\.-temp/; | ||
my $PROJECT_NAME_RX = qr/^(.*)_\d+[a-z]{1}\d{2}\./; | ||
|
||
# Keep track of which project we have found in archive | ||
my %projects; | ||
|
||
# One directory per plate, e.g. plate_CGaP_EDQ0034_SF | ||
# For each directory find scf and seq.clipped files | ||
# Work out project name from root of file names | ||
# Create project directory in warehouse (LIMS2_SEQ_FILE_DIR) if not exists | ||
# Move fixed files to this directory | ||
while( my $plate_dir = $temp_dir->next ){ | ||
next unless -d $plate_dir; | ||
while(my $data_file = $plate_dir->next){ | ||
my ($project_name, $fixed_file); | ||
#if ($data_file =~ /\.seq\.clipped$/){ | ||
if($data_file =~ /\.seq$/){ | ||
($project_name, $fixed_file) = fix_seq_file($data_file); | ||
} | ||
elsif($data_file =~ /\.scf$/){ | ||
($project_name, $fixed_file) = fix_scf_file($data_file); | ||
} | ||
else{ | ||
next; | ||
} | ||
|
||
# Sometimes we get reads called Empty Well so no project name | ||
# Ignore them | ||
next unless $project_name; | ||
|
||
my $project_dir = $projects{ $project_name }; | ||
unless($project_dir){ | ||
$project_dir = dir($ENV{LIMS2_SEQ_FILE_DIR},$project_name); | ||
$project_dir->mkpath; | ||
$projects{$project_name} = $project_dir; | ||
} | ||
|
||
$fixed_file->move_to($project_dir) or die "Could not move $fixed_file to $project_dir - $!"; | ||
} | ||
|
||
} | ||
|
||
# Remove "premic-w.-temp" from file names and read names: | ||
# e.g. EDQ0034_1a03.p1kSF_premix-w.-temp.scf | ||
sub fix_scf_file{ | ||
my $file = shift; | ||
|
||
print "SCF file name: $file\n"; | ||
my $new_name = "$file"; | ||
$new_name =~ s/$STRING_TO_REMOVE//; | ||
|
||
# tmp fix to convert SF -> SF1 | ||
$new_name =~ s/p1kSF\./p1kSF1./; | ||
$new_name =~ s/p1kSR\./p1kSR1./; | ||
|
||
my $new_file = file($new_name); | ||
$file->move_to( $new_file ) or die "Could not move $file to $new_file"; | ||
|
||
my ($project_name) = ( $new_file->basename =~ /$PROJECT_NAME_RX/g ); | ||
|
||
print "New SCF file name: $new_file\n"; | ||
print "Project name: $project_name\n\n"; | ||
return ($project_name, $new_file); | ||
} | ||
|
||
# EDQ0034_1a03.p1kSF_premix-w.-temp.seq.clipped | ||
# >EDQ0034_1a05.p1kSF_premix-w.-temp -- 17..634 of sequence | ||
sub fix_seq_file{ | ||
my $file = shift; | ||
print "Seq file name: $file\n"; | ||
my @lines = $file->slurp; | ||
|
||
my $new_name = "$file"; | ||
$new_name =~ s/$STRING_TO_REMOVE//; | ||
$new_name =~ s/\.clipped$//; | ||
|
||
# tmp fix to convert SF -> SF1 | ||
$new_name =~ s/p1kSF\./p1kSF1./; | ||
$new_name =~ s/p1kSR\./p1kSR1./; | ||
|
||
my $new_file = file($new_name); | ||
my $fh = $new_file->openw or die "Can't open $new_file for writing - $!"; | ||
|
||
foreach my $line (@lines){ | ||
$line =~ s/$STRING_TO_REMOVE//; | ||
# tmp fix to convert SF -> SF1 | ||
$line =~ s/p1kSF\b/p1kSF1/; | ||
$line =~ s/p1kSR\b/p1kSR1/; | ||
|
||
print $fh $line; | ||
} | ||
|
||
close $fh; | ||
|
||
my ($project_name) = ( $new_file->basename =~ /$PROJECT_NAME_RX/g ); | ||
print "New seq file name: $new_file\n"; | ||
print "Project name: $project_name\n\n"; | ||
return ($project_name, $new_file); | ||
} | ||
|
||
# Create file archive_names.txt in this dir if not exists | ||
# Append archive name to this file | ||
my $time_stamp = strftime("%Y-%m-%d %H:%M:%S", localtime(time)); | ||
foreach my $project_dir (values %projects){ | ||
my $project_list = $project_dir->file('archive_names.txt'); | ||
my $fh = $project_list->opena or die "Cannot open $project_list for appending - $!"; | ||
print $fh "$time_stamp,$archive\n"; | ||
close $fh; | ||
print "Sequencing data in project directory $project_dir updated\n"; | ||
} | ||
|
||
|
||
# Store orig archives in warehouse too | ||
# FIXME: put this in environment variable | ||
my $archive_dir = dir('/warehouse/team87_wh01/eurofins_order_archive_data'); | ||
my $archive_to = $archive_dir->file( $archive_file->basename ); | ||
$archive_file->move_to( $archive_to ) or die "Cannot move archive file $archive_file to $archive_to - $!"; | ||
|
||
# Tidy up temporarily unpacked files | ||
remove_tree($temp_dir->stringify) or die "Could not remove $temp_dir - $!"; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
CREATE TABLE audit.sequencing_projects ( | ||
audit_op CHAR(1) NOT NULL CHECK (audit_op IN ('D','I','U')), | ||
audit_user TEXT NOT NULL, | ||
audit_stamp TIMESTAMP NOT NULL, | ||
audit_txid INTEGER NOT NULL, | ||
id integer, | ||
name text, | ||
created_by_id integer, | ||
created_at timestamp without time zone, | ||
sub_projects integer, | ||
qc boolean, | ||
available_results boolean, | ||
abandoned boolean, | ||
is_384 boolean | ||
); | ||
CREATE OR REPLACE FUNCTION public.process_sequencing_projects_audit() | ||
RETURNS TRIGGER AS $sequencing_projects_audit$ | ||
BEGIN | ||
IF (TG_OP = 'DELETE') THEN | ||
INSERT INTO audit.sequencing_projects SELECT 'D', user, now(), txid_current(), OLD.*; | ||
ELSIF (TG_OP = 'UPDATE') THEN | ||
INSERT INTO audit.sequencing_projects SELECT 'U', user, now(), txid_current(), NEW.*; | ||
ELSIF (TG_OP = 'INSERT') THEN | ||
INSERT INTO audit.sequencing_projects SELECT 'I', user, now(), txid_current(), NEW.*; | ||
END IF; | ||
RETURN NULL; | ||
END; | ||
$sequencing_projects_audit$ LANGUAGE plpgsql; | ||
CREATE TRIGGER sequencing_projects_audit | ||
AFTER INSERT OR UPDATE OR DELETE ON public.sequencing_projects | ||
FOR EACH ROW EXECUTE PROCEDURE public.process_sequencing_projects_audit(); | ||
CREATE TABLE audit.sequencing_project_primers ( | ||
audit_op CHAR(1) NOT NULL CHECK (audit_op IN ('D','I','U')), | ||
audit_user TEXT NOT NULL, | ||
audit_stamp TIMESTAMP NOT NULL, | ||
audit_txid INTEGER NOT NULL, | ||
seq_project_id integer, | ||
primer_id text | ||
); | ||
CREATE OR REPLACE FUNCTION public.process_sequencing_project_primers_audit() | ||
RETURNS TRIGGER AS $sequencing_project_primers_audit$ | ||
BEGIN | ||
IF (TG_OP = 'DELETE') THEN | ||
INSERT INTO audit.sequencing_project_primers SELECT 'D', user, now(), txid_current(), OLD.*; | ||
ELSIF (TG_OP = 'UPDATE') THEN | ||
INSERT INTO audit.sequencing_project_primers SELECT 'U', user, now(), txid_current(), NEW.*; | ||
ELSIF (TG_OP = 'INSERT') THEN | ||
INSERT INTO audit.sequencing_project_primers SELECT 'I', user, now(), txid_current(), NEW.*; | ||
END IF; | ||
RETURN NULL; | ||
END; | ||
$sequencing_project_primers_audit$ LANGUAGE plpgsql; | ||
CREATE TRIGGER sequencing_project_primers_audit | ||
AFTER INSERT OR UPDATE OR DELETE ON public.sequencing_project_primers | ||
FOR EACH ROW EXECUTE PROCEDURE public.process_sequencing_project_primers_audit(); | ||
CREATE TABLE audit.sequencing_project_templates ( | ||
audit_op CHAR(1) NOT NULL CHECK (audit_op IN ('D','I','U')), | ||
audit_user TEXT NOT NULL, | ||
audit_stamp TIMESTAMP NOT NULL, | ||
audit_txid INTEGER NOT NULL, | ||
seq_project_id integer, | ||
qc_template_id integer | ||
); | ||
CREATE OR REPLACE FUNCTION public.process_sequencing_project_templates_audit() | ||
RETURNS TRIGGER AS $sequencing_project_templates_audit$ | ||
BEGIN | ||
IF (TG_OP = 'DELETE') THEN | ||
INSERT INTO audit.sequencing_project_templates SELECT 'D', user, now(), txid_current(), OLD.*; | ||
ELSIF (TG_OP = 'UPDATE') THEN | ||
INSERT INTO audit.sequencing_project_templates SELECT 'U', user, now(), txid_current(), NEW.*; | ||
ELSIF (TG_OP = 'INSERT') THEN | ||
INSERT INTO audit.sequencing_project_templates SELECT 'I', user, now(), txid_current(), NEW.*; | ||
END IF; | ||
RETURN NULL; | ||
END; | ||
$sequencing_project_templates_audit$ LANGUAGE plpgsql; | ||
CREATE TRIGGER sequencing_project_templates_audit | ||
AFTER INSERT OR UPDATE OR DELETE ON public.sequencing_project_templates | ||
FOR EACH ROW EXECUTE PROCEDURE public.process_sequencing_project_templates_audit(); | ||
CREATE TABLE audit.sequencing_primer_types ( | ||
audit_op CHAR(1) NOT NULL CHECK (audit_op IN ('D','I','U')), | ||
audit_user TEXT NOT NULL, | ||
audit_stamp TIMESTAMP NOT NULL, | ||
audit_txid INTEGER NOT NULL, | ||
id text | ||
); | ||
CREATE OR REPLACE FUNCTION public.process_sequencing_primer_types_audit() | ||
RETURNS TRIGGER AS $sequencing_primer_types_audit$ | ||
BEGIN | ||
IF (TG_OP = 'DELETE') THEN | ||
INSERT INTO audit.sequencing_primer_types SELECT 'D', user, now(), txid_current(), OLD.*; | ||
ELSIF (TG_OP = 'UPDATE') THEN | ||
INSERT INTO audit.sequencing_primer_types SELECT 'U', user, now(), txid_current(), NEW.*; | ||
ELSIF (TG_OP = 'INSERT') THEN | ||
INSERT INTO audit.sequencing_primer_types SELECT 'I', user, now(), txid_current(), NEW.*; | ||
END IF; | ||
RETURN NULL; | ||
END; | ||
$sequencing_primer_types_audit$ LANGUAGE plpgsql; | ||
CREATE TRIGGER sequencing_primer_types_audit | ||
AFTER INSERT OR UPDATE OR DELETE ON public.sequencing_primer_types | ||
FOR EACH ROW EXECUTE PROCEDURE public.process_sequencing_primer_types_audit(); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
INSERT INTO schema_versions(version) VALUES (101); | ||
|
||
INSERT INTO sequencing_primer_types(id) | ||
VALUES ('L1'),('LR'),('PNF'),('R1R'),('R2R'),('R3'),('Z1'),('Z2'),('PGO'),('FCHK'),('SP6R'),('LU6F'),('T3'),('T7gRNA_F'),('R4'),('NF'),('BACT3'),('BBR'),('EF1'), | ||
('SP6'),('PPA1'),('rox_SV40'),('Z1a'),('Z2a'),('hBac'),('IRNA5'),('IRNA3'),('LFR'),('LRR'),('bpA'),('En2'),('CRF'),('ERT'),('HPF'),('HPR'),('3ARM'),('IRES_B1'),('IRES_right'), | ||
('A2'),('A5'),('P19F'),('P19R'),('EF1aR1'),('betaR1'),('LFREV'),('p15A_Rev2'),('p15A_For2'); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
CREATE TABLE sequencing_projects( | ||
id serial primary key not null, | ||
name text not null, | ||
created_by_id integer references users(id) not null, | ||
created_at timestamp without time zone default now() not null, | ||
sub_projects integer not null, | ||
qc boolean default false, | ||
available_results boolean default false, | ||
abandoned boolean default false, | ||
is_384 boolean default false); | ||
|
||
CREATE TABLE sequencing_primer_types( id text primary key not null ); | ||
|
||
CREATE TABLE sequencing_project_primers( | ||
seq_project_id integer references sequencing_projects(id) not null, | ||
primer_id text references sequencing_primer_types(id) not null); | ||
|
||
CREATE TABLE sequencing_project_templates( | ||
seq_project_id integer references sequencing_projects(id) not null, | ||
qc_template_id integer references qc_templates(id) not null); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.