-
Notifications
You must be signed in to change notification settings - Fork 0
/
hive.ddl
executable file
·346 lines (333 loc) · 14.3 KB
/
hive.ddl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
Create database cms;
use cms;
drop table cms.ownership;
create external table cms.ownership(
change_type string,
physician_profile_id string,
physician_first_name string,
physician_middle_name string,
physician_last_name string,
physician_name_suffix string,
recipient_primary_business_street_address_line1 string,
recipient_primary_business_street_address_line2 string,
recipient_city string,
recipient_state string,
recipient_zip_code string,
recipient_country string,
recipient_province string,
recipient_postal_code string,
physician_primary_type string,
physician_specialty string,
record_id string,
program_year string,
total_amount_invested_usdollars decimal(18,3),
value_of_interest decimal(18,3),
terms_of_interest string,
submitting_applicable_manufacturer_or_applicable_gpo_name string,
applicable_manufacturer_or_applicable_gpo_making_payment_id string,
applicable_manufacturer_or_applicable_gpo_making_payment_name string,
applicable_manufacturer_or_applicable_gpo_making_payment_state string,
applicable_manufacturer_or_applicable_gpo_making_payment_country string,
dispute_status_for_publication string,
interest_held_by_physician_or_an_immediate_family_member string,
payment_publication_date string
)
row format delimited
fields terminated by ','
stored as textfile
location '/user/frothkoetter/cms.db/ownership/'
;
select * from cms.ownership limit 3;
drop table if exists cms.researchpayments;
create external table cms.researchpayments(
change_type string,
covered_recipient_type string,
noncovered_recipient_entity_name string,
teaching_hospital_ccn string,
teaching_hospital_id string,
teaching_hospital_name string,
physician_profile_id string,
physician_first_name string,
physician_middle_name string,
physician_last_name string,
physician_name_suffix string,
recipient_primary_business_street_address_line1 string,
recipient_primary_business_street_address_line2 string,
recipient_city string,
recipient_state string,
recipient_zip_code string,
recipient_country string,
recipient_province string,
recipient_postal_code string,
physician_primary_type string,
physician_specialty string,
physician_license_state_code1 string,
physician_license_state_code2 string,
physician_license_state_code3 string,
physician_license_state_code4 string,
physician_license_state_code5 string,
principal_investigator_1_profile_id string,
principal_investigator_1_first_name string,
principal_investigator_1_middle_name string,
principal_investigator_1_last_name string,
principal_investigator_1_name_suffix string,
principal_investigator_1_business_street_address_line1 string,
principal_investigator_1_business_street_address_line2 string,
principal_investigator_1_city string,
principal_investigator_1_state string,
principal_investigator_1_zip_code string,
principal_investigator_1_country string,
principal_investigator_1_province string,
principal_investigator_1_postal_code string,
principal_investigator_1_primary_type string,
principal_investigator_1_specialty string,
principal_investigator_1_license_state_code1 string,
principal_investigator_1_license_state_code2 string,
principal_investigator_1_license_state_code3 string,
principal_investigator_1_license_state_code4 string,
principal_investigator_1_license_state_code5 string,
principal_investigator_2_profile_id string,
principal_investigator_2_first_name string,
principal_investigator_2_middle_name string,
principal_investigator_2_last_name string,
principal_investigator_2_name_suffix string,
principal_investigator_2_business_street_address_line1 string,
principal_investigator_2_business_street_address_line2 string,
principal_investigator_2_city string,
principal_investigator_2_state string,
principal_investigator_2_zip_code string,
principal_investigator_2_country string,
principal_investigator_2_province string,
principal_investigator_2_postal_code string,
principal_investigator_2_primary_type string,
principal_investigator_2_specialty string,
principal_investigator_2_license_state_code1 string,
principal_investigator_2_license_state_code2 string,
principal_investigator_2_license_state_code3 string,
principal_investigator_2_license_state_code4 string,
principal_investigator_2_license_state_code5 string,
principal_investigator_3_profile_id string,
principal_investigator_3_first_name string,
principal_investigator_3_middle_name string,
principal_investigator_3_last_name string,
principal_investigator_3_name_suffix string,
principal_investigator_3_business_street_address_line1 string,
principal_investigator_3_business_street_address_line2 string,
principal_investigator_3_city string,
principal_investigator_3_state string,
principal_investigator_3_zip_code string,
principal_investigator_3_country string,
principal_investigator_3_province string,
principal_investigator_3_postal_code string,
principal_investigator_3_primary_type string,
principal_investigator_3_specialty string,
principal_investigator_3_license_state_code1 string,
principal_investigator_3_license_state_code2 string,
principal_investigator_3_license_state_code3 string,
principal_investigator_3_license_state_code4 string,
principal_investigator_3_license_state_code5 string,
principal_investigator_4_profile_id string,
principal_investigator_4_first_name string,
principal_investigator_4_middle_name string,
principal_investigator_4_last_name string,
principal_investigator_4_name_suffix string,
principal_investigator_4_business_street_address_line1 string,
principal_investigator_4_business_street_address_line2 string,
principal_investigator_4_city string,
principal_investigator_4_state string,
principal_investigator_4_zip_code string,
principal_investigator_4_country string,
principal_investigator_4_province string,
principal_investigator_4_postal_code string,
principal_investigator_4_primary_type string,
principal_investigator_4_specialty string,
principal_investigator_4_license_state_code1 string,
principal_investigator_4_license_state_code2 string,
principal_investigator_4_license_state_code3 string,
principal_investigator_4_license_state_code4 string,
principal_investigator_4_license_state_code5 string,
principal_investigator_5_profile_id string,
principal_investigator_5_first_name string,
principal_investigator_5_middle_name string,
principal_investigator_5_last_name string,
principal_investigator_5_name_suffix string,
principal_investigator_5_business_street_address_line1 string,
principal_investigator_5_business_street_address_line2 string,
principal_investigator_5_city string,
principal_investigator_5_state string,
principal_investigator_5_zip_code string,
principal_investigator_5_country string,
principal_investigator_5_province string,
principal_investigator_5_postal_code string,
principal_investigator_5_primary_type string,
principal_investigator_5_specialty string,
principal_investigator_5_license_state_code1 string,
principal_investigator_5_license_state_code2 string,
principal_investigator_5_license_state_code3 string,
principal_investigator_5_license_state_code4 string,
principal_investigator_5_license_state_code5 string,
submitting_applicable_manufacturer_or_applicable_gpo_name string,
applicable_manufacturer_or_applicable_gpo_making_payment_id string,
applicable_manufacturer_or_applicable_gpo_making_payment_name string,
applicable_manufacturer_or_applicable_gpo_making_payment_state string,
applicable_manufacturer_or_applicable_gpo_making_payment_country string,
related_product_indicator string,
covered_or_noncovered_indicator_1 string,
indicate_drug_or_biological_or_device_or_medical_supply_1 string,
product_category_or_therapeutic_area_1 string,
name_of_drug_or_biological_or_device_or_medical_supply_1 string,
associated_drug_or_biological_ndc_1 string,
covered_or_noncovered_indicator_2 string,
indicate_drug_or_biological_or_device_or_medical_supply_2 string,
product_category_or_therapeutic_area_2 string,
name_of_drug_or_biological_or_device_or_medical_supply_2 string,
associated_drug_or_biological_ndc_2 string,
covered_or_noncovered_indicator_3 string,
indicate_drug_or_biological_or_device_or_medical_supply_3 string,
product_category_or_therapeutic_area_3 string,
name_of_drug_or_biological_or_device_or_medical_supply_3 string,
associated_drug_or_biological_ndc_3 string,
covered_or_noncovered_indicator_4 string,
indicate_drug_or_biological_or_device_or_medical_supply_4 string,
product_category_or_therapeutic_area_4 string,
name_of_drug_or_biological_or_device_or_medical_supply_4 string,
associated_drug_or_biological_ndc_4 string,
covered_or_noncovered_indicator_5 string,
indicate_drug_or_biological_or_device_or_medical_supply_5 string,
product_category_or_therapeutic_area_5 string,
name_of_drug_or_biological_or_device_or_medical_supply_5 string,
associated_drug_or_biological_ndc_5 string,
total_amount_of_payment_usdollars decimal(10,2),
date_of_payment string,
form_of_payment_or_transfer_of_value string,
expenditure_category1 string,
expenditure_category2 string,
expenditure_category3 string,
expenditure_category4 string,
expenditure_category5 string,
expenditure_category6 string,
preclinical_research_indicator string,
delay_in_publication_indicator string,
name_of_study string,
dispute_status_for_publication string,
record_id string,
program_year string,
payment_publication_date string,
clinicaltrials_gov_identifier string,
research_information_link string,
context_of_research string
)
row format delimited
fields terminated by ','
stored as textfile
location '/user/frothkoetter/cms.db/researchpayments/'
;
select * from cms.researchpayments limit 3;
use cms;
drop table cms.generalpayments;
create external table cms.generalpayments (
change_type string,
covered_recipient_type string,
teaching_hospital_ccn string,
teaching_hospital_id string,
teaching_hospital_name string,
physician_profile_id string,
physician_first_name string,
physician_middle_name string,
physician_last_name string,
physician_name_suffix string,
recipient_primary_business_street_address_line1 string,
recipient_primary_business_street_address_line2 string,
recipient_city string,
recipient_state string,
recipient_zip_code string,
recipient_country string,
recipient_province string,
recipient_postal_code string,
physician_primary_type string,
physician_specialty string,
physician_license_state_code1 string,
physician_license_state_code2 string,
physician_license_state_code3 string,
physician_license_state_code4 string,
physician_license_state_code5 string,
submitting_applicable_manufacturer_or_applicable_gpo_name string,
applicable_manufacturer_or_applicable_gpo_making_payment_id string,
applicable_manufacturer_or_applicable_gpo_making_payment_name string,
applicable_manufacturer_or_applicable_gpo_making_payment_state string,
applicable_manufacturer_or_applicable_gpo_making_payment_country string,
total_amount_of_payment_usdollars decimal ( 10,2),
date_of_payment string,
number_of_payments_included_in_total_amount int,
form_of_payment_or_transfer_of_value string,
nature_of_payment_or_transfer_of_value string,
city_of_travel string,
state_of_travel string,
country_of_travel string,
physician_ownership_indicator string,
third_party_payment_recipient_indicator string,
name_of_third_party_entity_receiving_payment_or_transfer_of_value string,
charity_indicator string,
third_party_equals_covered_recipient_indicator string,
contextual_information string,
delay_in_publication_indicator string,
record_id string,
dispute_status_for_publication string,
related_product_indicator string,
covered_or_noncovered_indicator_1 string,
indicate_drug_or_biological_or_device_or_medical_supply_1 string,
product_category_or_therapeutic_area_1 string,
name_of_drug_or_biological_or_device_or_medical_supply_1 string,
associated_drug_or_biological_ndc_1 string,
covered_or_noncovered_indicator_2 string,
indicate_drug_or_biological_or_device_or_medical_supply_2 string,
product_category_or_therapeutic_area_2 string,
name_of_drug_or_biological_or_device_or_medical_supply_2 string,
associated_drug_or_biological_ndc_2 string,
covered_or_noncovered_indicator_3 string,
indicate_drug_or_biological_or_device_or_medical_supply_3 string,
product_category_or_therapeutic_area_3 string,
name_of_drug_or_biological_or_device_or_medical_supply_3 string,
associated_drug_or_biological_ndc_3 string,
covered_or_noncovered_indicator_4 string,
indicate_drug_or_biological_or_device_or_medical_supply_4 string,
product_category_or_therapeutic_area_4 string,
name_of_drug_or_biological_or_device_or_medical_supply_4 string,
associated_drug_or_biological_ndc_4 string,
covered_or_noncovered_indicator_5 string,
indicate_drug_or_biological_or_device_or_medical_supply_5 string,
product_category_or_therapeutic_area_5 string,
name_of_drug_or_biological_or_device_or_medical_supply_5 string,
associated_drug_or_biological_ndc_5 string,
program_year string,
payment_publication_date string
)
row format delimited
fields terminated by ','
stored as textfile
location '/user/frothkoetter/cms.db/generalpayments/';
select * from generalpayments limit 3;
use cms;
drop table if exists cms.abrechnungen;
create table cms.abrechnungen (arzt DOUBLE PRECISION, fachgebiet DOUBLE PRECISION,
krankenhaus_oder_arzt DOUBLE PRECISION, medikament DOUBLE PRECISION, strittig INT,
bundesland DOUBLE PRECISION, leistungsempfaenger_plz DOUBLE PRECISION,
auszahlung_monat DOUBLE PRECISION, auszahlung_euro DOUBLE PRECISION)
STORED AS PARQUET
LOCATION '/tmp/cmsml/'
TBLPROPERTIES ('transactional'='false', 'external.table.purge'='true')
;
INSERT OVERWRITE TABLE cms.abrechnungen select
cast( abs( hash(physician_profile_id))/9999999999999999999 as DOUBLE PRECISION) arzt,
cast( abs( hash(physician_specialty))/9999999999999999999 as DOUBLE PRECISION) fachgebiet,
cast( abs( hash(covered_recipient_type))/9999999999999999999 as DOUBLE PRECISION) krankenhaus_oder_arzt,
cast( abs( hash(associated_drug_or_biological_ndc_1))/9999999999999999999 as DOUBLE PRECISION) medikament,
case when dispute_status_for_publication ='"No"' then 0 else 1 end strittig,
cast( abs( hash( recipient_state ))/9999999999999999999 as DOUBLE PRECISION) bundesland,
cast( abs( hash( substr( recipient_zip_code, 2, 5 )))/9999999999999999999 as DOUBLE PRECISION) leistungsempfaenger_plz,
cast( abs( hash( substr(date_of_payment,3)))/9999999999999999999 as DOUBLE PRECISION) auszahlung_monat,
cast (total_amount_of_payment_usdollars as float ) auszahlung_euro
from cms.generalpayments
limit 250000
;
select * from cms.abrechnungen limit 3;