-
Notifications
You must be signed in to change notification settings - Fork 0
/
LoadCrisprGroups.pm
150 lines (121 loc) · 4.26 KB
/
LoadCrisprGroups.pm
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
package LIMS2::Task::General::LoadCrisprGroups;
use strict;
use warnings FATAL => 'all';
=head1 NAME
LIMS2::Task::General::LoadCrisprGroups
=head1 DESCRIPTION
This command creates crispr groups in LIMS2.
Required options:
- user: Name of user creating the plate
- gene_type: Gene type id, for example HGNC or MGI
- upload_file: CSV file containing crispr group data to be uploaded
The design plate data file is a csv file, with at least a column named gene,and 2 columns
for each crispr in the group representing the crispr_id and if its left_of_target or not.
Column follows:
- gene
- crispr_1
- left_1
- crispr_2
- left_2
...
gene can be gene id or marker symbol.
crispr can be lims2_crispr_id or wge_crispr_id.
=cut
use Moose;
use Try::Tiny;
use Text::CSV;
use LIMS2::Model;
use MooseX::Types::Path::Class;
use namespace::autoclean;
extends 'LIMS2::Task';
override abstract => sub {
'Load Crispr Groups from CSV file';
};
has gene_type => (
is => 'ro',
isa => 'Str',
required => 1,
documentation => 'Gene type id, for example HGNC or MGI',
traits => [ 'Getopt' ],
);
has upload_file => (
is => 'ro',
isa => 'Path::Class::File',
traits => [ 'Getopt' ],
documentation => 'File holding crispr group data. Should contain a gene column and then pairs of columns (crispr_1, left_1; crispr_2, left_2; ...).',
coerce => 1,
cmd_flag => 'upload-file',
required => 1,
);
has user => (
is => 'ro',
isa => 'Str',
traits => [ 'Getopt' ],
documentation => 'User who is creating the plate',
required => 1,
);
sub execute {
my ( $self, $opts, $args ) = @_;
$self->log->info( 'Uploading crispr group data from file ' . $self->upload_file );
# from species, get the gene_type_id
my $species;
if ($self->gene_type eq 'MGI' ) {
$species = 'Mouse';
} elsif ($self->gene_type eq 'HGNC' ) {
$species = 'Human';
}
# get default assembly for species
my $assembly = $self->model->schema->resultset('SpeciesDefaultAssembly')->find(
{ species_id => $species }
)->assembly_id;
# read the csv file
my $csv = Text::CSV->new();
my $fh = $self->upload_file->openr or die( 'Can not open plate data file' );
$csv->column_names( map {lc $_} @{ ($csv->getline( $fh )) } );
while ( my $data = $csv->getline_hr( $fh ) ) {
# get the gene_id
my $gene_id = $self->model->retrieve_gene( { species => $species, search_term => $data->{gene} } )->{gene_id};
my @crispr_group;
# get the crisprs, starting on crispr 1
my $count = 1;
while ( $count ) {
if ( $data->{"crispr_$count"} ) {
my $crispr_id = $data->{"crispr_$count"};
my $crispr;
try {
$crispr = $self->model->retrieve_crispr( { id => $crispr_id } )->seq // '';
};
# if the crispr can't be found, import it from wge
unless ($crispr) {
$self->log->debug( 'Importing crispr ' . $crispr_id . ' from WGE' );
my @crispr_lims = $self->model->import_wge_crisprs( [ $crispr_id ], $species, $assembly );
$crispr_id = $crispr_lims[0]->{lims2_id};
}
$crispr_group[$count-1]->{crispr_id} = $crispr_id;
$crispr_group[$count-1]->{left_of_target} = $data->{"left_$count"};
# next crispr
$count++;
} else {
$self->log->debug( 'Creating crispr group for gene' . $gene_id );
$self->model->txn_do(
sub {
$self->model->create_crispr_group({
gene_id => $gene_id,
gene_type_id => $self->gene_type,
crisprs => \@crispr_group,
});
unless ( $self->commit ) {
$self->model->txn_rollback;
}
}
);
undef @crispr_group;
undef $count;
}
}
}
return;
}
__PACKAGE__->meta->make_immutable;
1;
__END__