/
np-vocabulary-mappings-rxnorm.sql
390 lines (342 loc) · 18.3 KB
/
np-vocabulary-mappings-rxnorm.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
/*add mappings for NP names and constituents to RxNorm concepts (if any)*/
drop table scratch_sanya.np_const_to_rxnorm;
drop table scratch_sanya.np_rxnorm_substring_temp
create table if not exists scratch_sanya.np_to_rxnorm (
concept_id_rxnorm int4 not null,
concept_id_napdi int4 not null,
concept_name_rxnorm varchar,
concept_name_napdi varchar,
concept_class_id_rxnorm varchar,
concept_code varchar,
valid_start_date date,
valid_end_date date
);
create table if not exists scratch_sanya.np_const_to_rxnorm (
concept_id_rxnorm int4 not null,
concept_id_napdi int4 not null,
concept_name_rxnorm varchar,
concept_name_napdi varchar,
concept_class_id_rxnorm varchar,
concept_code varchar,
valid_start_date date,
valid_end_date date
);
create table if not exists scratch_sanya.np_rxnorm_substring_temp (
concept_id_rxnorm int4 not null,
concept_id_napdi int4 not null,
concept_name_rxnorm varchar,
concept_name_napdi varchar,
concept_class_id_rxnorm varchar,
concept_class_id_napdi varchar,
concept_code varchar,
valid_start_date date,
valid_end_date date
);
--test query to find RxNorm concepts - exact matches constituents (N=123)
with nps as (
select distinct upper(regexp_replace(c.concept_name, '\[.*\]','' )) concept_name_napdi, concept_id concept_id_napdi, concept_code,
concept_class_id np_concept_class_id
from staging_vocabulary.concept c
where c.vocabulary_id ='NAPDI'
)
insert into scratch_sanya.np_const_to_rxnorm
(concept_id_rxnorm, concept_name_rxnorm, concept_id_napdi, concept_name_napdi, concept_class_id_rxnorm,
concept_code, valid_start_date, valid_end_date)
select c2.concept_id concept_id_rxnorm, c2.concept_name concept_name_rxnorm, nps.concept_id_napdi,
nps.concept_name_napdi, c2.concept_class_id concept_class_id_rxnorm, nps.concept_code,
c2.valid_start_date, c2.valid_end_date
from staging_vocabulary.concept c2 inner join nps on upper(c2.concept_name) = nps.concept_name_napdi
where vocabulary_id ='RxNorm' and nps.np_concept_class_id = 'NaPDI NP Constituent'
;
--rxnorm np name matches (N=7)
with nps as (
select distinct upper(regexp_replace(c.concept_name, '\[.*\]','' )) concept_name_napdi, concept_id concept_id_napdi, concept_code,
concept_class_id np_concept_class_id
from staging_vocabulary.concept c
where c.vocabulary_id ='NAPDI'
)
insert into scratch_sanya.np_to_rxnorm
(concept_id_rxnorm, concept_name_rxnorm, concept_id_napdi, concept_name_napdi, concept_class_id_rxnorm,
concept_code, valid_start_date, valid_end_date)
select c2.concept_id concept_id_rxnorm, c2.concept_name concept_name_rxnorm, nps.concept_id_napdi,
nps.concept_name_napdi, c2.concept_class_id concept_class_id_rxnorm, nps.concept_code,
c2.valid_start_date, c2.valid_end_date
from staging_vocabulary.concept c2 inner join nps on upper(c2.concept_name) = nps.concept_name_napdi
where vocabulary_id ='RxNorm' and nps.np_concept_class_id != 'NaPDI NP Constituent'
;
--for all NPs - substring match
with nps as (
select distinct upper(regexp_replace(c.concept_name, '\[.*\]','' )) concept_name_napdi, concept_id concept_id_napdi, concept_code,
concept_class_id concept_class_id_napdi
from staging_vocabulary.concept c
where c.vocabulary_id ='NAPDI'
and c.concept_class_id != 'NaPDI NP Spelling Variation'
), rxns as (
select distinct c2.concept_id concept_id_rxnorm, c2.vocabulary_id, c2.standard_concept,
upper(c2.concept_name) concept_name_rxnorm, c2.concept_class_id concept_class_id_rxnorm,
c2.valid_start_date, c2.valid_end_date
from staging_vocabulary.concept c2
where c2.vocabulary_id = 'RxNorm'
)
insert into scratch_oct2022_np_vocab.np_rxnorm_substring_temp (concept_id_rxnorm,
concept_name_rxnorm, concept_id_napdi, concept_name_napdi, concept_class_id_rxnorm,
concept_class_id_napdi, concept_code, valid_start_date, valid_end_date)
select distinct rxns.concept_id_rxnorm, rxns.concept_name_rxnorm, nps.concept_id_napdi,
nps.concept_name_napdi, rxns.concept_class_id_rxnorm, nps.concept_class_id_napdi, nps.concept_code,
rxns.valid_start_date, rxns.valid_end_date
from rxns inner join nps on (
rxns.concept_name_rxnorm like concat('% ',nps.concept_name_napdi) or
rxns.concept_name_rxnorm like concat(nps.concept_name_napdi,' %') or
rxns.concept_name_rxnorm like concat('% ',nps.concept_name_napdi,' %')
)
;
--N=20,216 (includes non-exact matches, filtering done below)
insert into scratch_sanya.np_to_rxnorm (concept_id_rxnorm, concept_id_napdi, concept_name_rxnorm,
concept_name_napdi, concept_class_id_rxnorm, concept_code, valid_start_date, valid_end_date)
select npx.concept_id_rxnorm, npx.concept_id_napdi, npx.concept_name_rxnorm, npx.concept_name_napdi,
npx.concept_class_id_rxnorm, npx.concept_code, npx.valid_start_date, npx.valid_end_date
from scratch_sanya.np_rxnorm_substring_temp npx
where npx.concept_class_id_napdi != 'NaPDI NP Constituent'
--N=11,308
insert into scratch_sanya.np_const_to_rxnorm (concept_id_rxnorm, concept_id_napdi, concept_name_rxnorm,
concept_name_napdi, concept_class_id_rxnorm, concept_code, valid_start_date, valid_end_date)
select npx.concept_id_rxnorm, npx.concept_id_napdi, npx.concept_name_rxnorm, npx.concept_name_napdi,
npx.concept_class_id_rxnorm, npx.concept_code, npx.valid_start_date, npx.valid_end_date
from scratch_sanya.np_rxnorm_substring_temp npx
where npx.concept_class_id_napdi = 'NaPDI NP Constituent'
--add relationship in vocabulary - constituent_maps_to, np_maps_to
--add to vocabulary.concept
create temporary sequence if not exists napdi_concept_sequence as integer increment by -1 maxvalue -7005067;
INSERT INTO staging_vocabulary.concept (concept_id, concept_name, domain_id, vocabulary_id, concept_class_id, standard_concept, concept_code,
valid_start_date, valid_end_date, invalid_reason)
VALUES (nextval('napdi_concept_sequence'), 'napdi_np_maps_to', 'Metadata', 'Relationship', 'Relationship', '', 'OMOP generated',
'2000-01-01', '2099-02-22', '');
INSERT INTO staging_vocabulary.concept (concept_id, concept_name, domain_id, vocabulary_id, concept_class_id, standard_concept, concept_code,
valid_start_date, valid_end_date, invalid_reason)
VALUES (nextval('napdi_concept_sequence'), 'napdi_const_maps_to', 'Metadata', 'Relationship', 'Relationship', '', 'OMOP generated',
'2000-01-01', '2099-02-22', '')
;
--add to vocabulary.relationship
insert into staging_vocabulary.relationship (relationship_id, relationship_name, is_hierarchical, defines_ancestry, reverse_relationship_id, relationship_concept_id)
with rel as (select c.concept_id rel_id from staging_vocabulary.concept c where c.concept_name = 'napdi_np_maps_to')
select 'napdi_np_maps_to', 'NaPDI NP maps to', 0, 0, 0, rel_id
from rel
;
insert into staging_vocabulary.relationship (relationship_id, relationship_name, is_hierarchical, defines_ancestry, reverse_relationship_id, relationship_concept_id)
with rel as (select c.concept_id rel_id from staging_vocabulary.concept c where c.concept_name = 'napdi_np_maps_to')
select 'napdi_const_maps_to', 'NaPDI constituent maps to', 0, 0, 0, rel_id
from rel
;
--Filtering mappings before adding to vocabulary
--remove BALM, JU, MATE, RAPE, RAPE SEED after review
delete from scratch_sanya.np_to_rxnorm
where concept_id_napdi in (-7000171, -7000259, -7000470, -7000562, -7000172)
--how to include filtering done in rxnorm-mappings-string-match.ipynb?? filters out non exact matches (N=)
--also include rxnorm-faers-match queries?
--Add to concept_relationship: Exact+substring match of NP names
insert into staging_vocabulary.concept_relationship (concept_id_1, concept_id_2, relationship_id,
valid_start_date, valid_end_date, invalid_reason)
select concept_id_napdi, concept_id_rxnorm, 'napdi_np_maps_to', valid_start_date, valid_end_date, ''
from scratch_sanya.np_to_rxnorm
--Add to concept_relationship: Exact+substring match of constituents
insert into staging_vocabulary.concept_relationship (concept_id_1, concept_id_2, relationship_id,
valid_start_date, valid_end_date, invalid_reason)
select concept_id_napdi, concept_id_rxnorm, 'napdi_const_maps_to', valid_start_date, valid_end_date, ''
from scratch_sanya.np_const_to_rxnorm
/**BELOW QUERIES NOT FOR VOCAB - ASSESS MAPPINGS AFTER REVEW**/
--Generate tables for review
select distinct ntr.concept_id_napdi, ntr.concept_id_rxnorm, ntr.concept_name_napdi, ntr.concept_name_rxnorm from scratch_sanya.np_to_rxnorm ntr
select distinct ntr.concept_name_napdi, count(*) from scratch_sanya.np_to_rxnorm ntr
group by ntr.concept_name_napdi
--Query to find codes used in FAERS data
select t.drug_concept_id, count(distinct t.rid)
from (
(
select drug_concept_id, primaryid rid
from faers.standard_drug_outcome_drilldown sdod
inner join scratch_sanya.np_rxnorm_faers_match nrfm on sdod.drug_concept_id = nrfm.concept_id_rxnorm
where primaryid is not null
and isr is null
)
union
(
select drug_concept_id, isr rid
from faers.standard_drug_outcome_drilldown sdod
inner join scratch_sanya.np_rxnorm_faers_match nrfm on sdod.drug_concept_id = nrfm.concept_id_rxnorm
where primaryid is not null
and isr is not null
)
) t
group by t.drug_concept_id
--queries to get RxNorm mappings - add to example queries on GitHub
select * from staging_vocabulary.concept c
where c.concept_id = '-7001523'
select * from staging_vocabulary.concept c where concept_class_id = 'Cinnamon';
--cinnamon
select c1.concept_name, c1.concept_id, c2.concept_name, c2.concept_id
from staging_vocabulary.concept c1
inner join staging_vocabulary.concept_relationship cr on c1.concept_id = cr.concept_id_1
inner join staging_vocabulary.concept c2 on c2.concept_id = cr.concept_id_2
where cr.relationship_id = 'napdi_np_maps_to'
and c1.concept_id = -7000976
--ginger
select c1.concept_name, c1.concept_id, c2.concept_name, c2.concept_id
from staging_vocabulary.concept c1
inner join staging_vocabulary.concept_relationship cr on c1.concept_id = cr.concept_id_1
inner join staging_vocabulary.concept c2 on c2.concept_id = cr.concept_id_2
where cr.relationship_id = 'napdi_np_maps_to'
and c1.concept_id = -7001032
--all NP mappings to RxNorm
select c1.concept_name, c1.concept_id, c2.concept_name, c2.concept_id
from staging_vocabulary.concept c1
inner join staging_vocabulary.concept_relationship cr on c1.concept_id = cr.concept_id_1
inner join staging_vocabulary.concept c2 on c2.concept_id = cr.concept_id_2
where cr.relationship_id = 'napdi_np_maps_to'
--cinnamon constituents
select distinct c1.concept_name, c1.concept_id, c3.concept_name, c3.concept_id
from staging_vocabulary.concept c1
inner join staging_vocabulary.concept_relationship cr1 on c1.concept_id = cr1.concept_id_1
inner join staging_vocabulary.concept c2 on c2.concept_id = cr1.concept_id_2
inner join staging_vocabulary.concept_relationship cr2 on c2.concept_id = cr2.concept_id_1
inner join staging_vocabulary.concept c3 on c3.concept_id = cr2.concept_id_2
where cr1.relationship_id = 'napdi_is_pt_of'
and cr2.relationship_id = 'napdi_has_const'
and c1.concept_id = '-7000976'
--cinnamon constituents RxNorm mappings
with cinn_cons as (
select c1.concept_name np_name, c1.concept_id np_id, c2.concept_name np_const_name, c2.concept_id np_const_id
from staging_vocabulary.concept c1
inner join staging_vocabulary.concept_relationship cr on c1.concept_id = cr.concept_id_1
inner join staging_vocabulary.concept c2 on c2.concept_id = cr.concept_id_2
where cr.relationship_id = 'napdi_has_const'
and c1.concept_id = -7001529
)
select cinn_cons.np_id, cinn_cons.np_name, cr2.concept_id_1 napdi_const_id, cinn_cons.np_const_name,
cr2.concept_id_2 rxnorm_id, c.concept_name rxnorm_name
from staging_vocabulary.concept_relationship cr2
inner join cinn_cons on cinn_cons.np_const_id = cr2.concept_id_1
inner join staging_vocabulary.concept c on c.concept_id = cr2.concept_id_2
where cr2.relationship_id = 'napdi_const_maps_to'
--constituent mappings sample
select distinct c2.concept_name, c2.concept_id
from staging_vocabulary.concept c1
inner join staging_vocabulary.concept_relationship cr on c1.concept_id = cr.concept_id_1
inner join staging_vocabulary.concept c2 on c2.concept_id = cr.concept_id_2
where cr.relationship_id = 'napdi_pt'
with np_cons as (
select distinct c1.concept_name np_name, c1.concept_id np_id, c3.concept_name np_const_name, c3.concept_id np_const_id
from staging_vocabulary.concept c1
inner join staging_vocabulary.concept_relationship cr1 on c1.concept_id = cr1.concept_id_1
inner join staging_vocabulary.concept c2 on c2.concept_id = cr1.concept_id_2
inner join staging_vocabulary.concept_relationship cr2 on c2.concept_id = cr2.concept_id_1
inner join staging_vocabulary.concept c3 on c3.concept_id = cr2.concept_id_2
where cr1.relationship_id = 'napdi_is_pt_of'
and cr2.relationship_id = 'napdi_has_const'
and c1.concept_id in (-7000976, -7001047, -7001077, -7001021, -7000976, -7001198, -7001224,
-7001012, -7000966, -7001032, -7001092, -7001008, -7001117, -7000994, -7001153, -7001114,
-7001156, -7001087, -7001025, -7001083, -7001209, -7001094, -7001050)
)
select np_cons.np_id, np_cons.np_name, cr2.concept_id_1 np_const_id, np_cons.np_const_name,
cr2.concept_id_2 rxnorm_id, c.concept_name rxnorm_name
from staging_vocabulary.concept_relationship cr2
inner join np_cons on np_cons.np_const_id = cr2.concept_id_1
inner join staging_vocabulary.concept c on c.concept_id = cr2.concept_id_2
where cr2.relationship_id = 'napdi_const_maps_to'
---SCRATCH
--N=14,755 (Ingredient)
--N=3,151 (Precise Ingredient)
select count(*) from staging_vocabulary.concept c
where c.vocabulary_id = 'RxNorm' and c.concept_class_id = 'Precise Ingredient'
--N=18
select distinct c.concept_class_id from staging_vocabulary.concept c
where c.vocabulary_id = 'RxNorm'
--N=2,289 (total is 5061 with spelling variations)
select count(*) from staging_vocabulary.concept c
where c.vocabulary_id = 'NAPDI' and c.concept_class_id != 'NaPDI NP Spelling Variation'
--DO - find all strings from 14755+3151 with 2289 possible sub strings
--find all RxNorm NP concepts - Latin binomial, common names, constituents
--very very long with all matches
with nps as (
select distinct concept_name np_name
from staging_vocabulary.concept c
where c.vocabulary_id ='NAPDI'
)
select *
from staging_vocabulary.concept c2
inner join nps on upper(c2.concept_name) like concat('%',nps.np_name,'%')
where c2.vocabulary_id = 'RxNorm' and nps.np_name = 'GREEN TEA'
;
select * from staging_vocabulary.concept c
where c.vocabulary_id = 'RxNorm' and upper(c.concept_name) like concat('%', 'GREEN TEA', '%')
select * from staging_vocabulary.concept c
where c.concept_id = -7005067
select * from scratch_feb2022_np_vocab.lb_to_constituent ltc
where ltc.latin_binomial = 'Camellia sinensis'
select * from scratch_feb2022_np_vocab.lb_to_common_name_and_pt ltcnap
where ltcnap.pt = 'Tao'
with pt_lb_mapping as (
select distinct c1.concept_name pt, regexp_replace(c2.concept_name, '.*\[(.*)\]','\1' ) lb
from staging_vocabulary.concept c1
inner join staging_vocabulary.concept_relationship cr on c1.concept_id = cr.concept_id_1
inner join staging_vocabulary.concept c2 on c2.concept_id = cr.concept_id_2
where cr.relationship_id = 'napdi_is_pt_of'
)
select distinct pt_lb_mapping.lb, regexp_replace(c2.concept_name, '\[.*\]','' ) mappings
from staging_vocabulary.concept c1
inner join staging_vocabulary.concept_relationship cr on c1.concept_id = cr.concept_id_1
inner join staging_vocabulary.concept c2 on c2.concept_id = cr.concept_id_2
inner join pt_lb_mapping on c1.concept_name = pt_lb_mapping.pt
where cr.relationship_id = 'napdi_is_pt_of'
order by pt_lb_mapping.lb
;
select * from staging_vocabulary.concept c
where c.vocabulary_id = 'RxNorm'
select distinct upper(regexp_replace(c.concept_name, '\[.*\]','' )) concept_name_napdi, concept_id concept_id_napdi, concept_code,
concept_class_id concept_class_id_napdi
from staging_vocabulary.concept c
where c.vocabulary_id ='NAPDI'
and c.concept_class_id != 'NaPDI NP Spelling Variation' and c.concept_class_id = 'Feverfew'
--substring match to rxnorm - save to temp (no extra cost for non indexed fields)
with nps as (
select distinct upper(regexp_replace(c.concept_name, '\[.*\]','' )) concept_name_napdi, concept_id concept_id_napdi, concept_code,
concept_class_id concept_class_id_napdi
from staging_vocabulary.concept c
where c.vocabulary_id ='NAPDI'
and c.concept_class_id != 'NaPDI NP Spelling Variation'
), rxns as (
select distinct c2.concept_id concept_id_rxnorm, c2.vocabulary_id, c2.standard_concept,
upper(c2.concept_name) concept_name_rxnorm, c2.concept_class_id concept_class_id_rxnorm,
c2.valid_start_date, c2.valid_end_date
from staging_vocabulary.concept c2
where c2.vocabulary_id = 'RxNorm'
)
insert into scratch_sanya.np_rxnorm_substring_temp
(concept_id_rxnorm, concept_name_rxnorm, concept_id_napdi, concept_name_napdi, concept_class_id_rxnorm,
concept_class_id_napdi, concept_code, valid_start_date, valid_end_date)
select rxns.concept_id_rxnorm, rxns.concept_name_rxnorm, nps.concept_id_napdi,
nps.concept_name_napdi, rxns.concept_class_id_rxnorm, nps.concept_class_id_napdi, nps.concept_code,
rxns.valid_start_date, rxns.valid_end_date
from rxns inner join nps on rxns.concept_name_rxnorm like concat('%',nps.concept_name_napdi,'%')
where nps.concept_name_napdi = 'GREEN TEA'
;
with nps as (
select distinct upper(regexp_replace(c.concept_name, '\[.*\]','' )) concept_name_napdi, concept_id concept_id_napdi, concept_code,
concept_class_id concept_class_id_napdi
from staging_vocabulary.concept c
where c.vocabulary_id ='NAPDI'
and c.concept_class_id != 'NaPDI NP Spelling Variation'
), rxns as (
select distinct c2.concept_id concept_id_rxnorm, c2.vocabulary_id, c2.standard_concept,
upper(c2.concept_name) concept_name_rxnorm, c2.concept_class_id concept_class_id_rxnorm,
c2.valid_start_date, c2.valid_end_date
from staging_vocabulary.concept c2
where c2.vocabulary_id = 'RxNorm'
)
insert into scratch_sanya.np_rxnorm_substring_temp (concept_id_rxnorm,
concept_name_rxnorm, concept_id_napdi, concept_name_napdi, concept_class_id_rxnorm,
concept_class_id_napdi, concept_code, valid_start_date, valid_end_date)
select rxns.concept_id_rxnorm, rxns.concept_name_rxnorm, nps.concept_id_napdi,
nps.concept_name_napdi, rxns.concept_class_id_rxnorm, nps.concept_class_id_napdi, nps.concept_code,
rxns.valid_start_date, rxns.valid_end_date
from rxns inner join nps on rxns.concept_name_rxnorm like concat('%',nps.concept_name_napdi,'%')
;