In [0]:
import requests #to get the data from the s3 bucket

#Steps to complete
1. Download all neccessary Files
2. Analyze and parse content into respective domains (Medications and Problems)
3. Combine Clinical Data with other supplied data
4. Store the output (parquet, delta, csv)
5. Describe Pipeline
6. Any additional Details
7. Describe this feed into a common Data Model Format for consumption into Data Warehouse / Lakehouse with focus on HL7/Fhir


#**Drop ref files into dbfs or volumes**
/FileStore/tables/MDE/{fileName}
 Need an unrestricted cluster to interact with DBFS

In [0]:
#Filenames
fileNames = ["ccda_pre_signed_urls.csv", "data_engineer_exam_claims_final.csv", "data_engineer_exam_rx_final.csv"]

In [0]:
#create load function
def ingestRefData(fileName):

    # File location and type
    file_location = f"/FileStore/tables/MDE/{fileName}"
    file_type = "csv"

    # CSV options
    infer_schema = "false"
    first_row_is_header = "true"
    delimiter = ","

    # The applied options are for CSV files. For other file types, these will be ignored.
    df = spark.read.format(file_type) \
        .option("inferSchema", infer_schema) \
        .option("header", first_row_is_header) \
        .option("sep", delimiter) \
        .load(file_location)

    tableName = fileName[:-4]
    #I print to grab the names of files for review- when pushing to PROD, the manual interventions go away
    print(tableName)

    #turn into delta view for temp storage- no need for these to be in perm/table storage
    df.createOrReplaceTempView(tableName)

In [0]:
for x in fileNames:
    ingestRefData(x)

ccda_pre_signed_urls
data_engineer_exam_claims_final
data_engineer_exam_rx_final


In [0]:
%sql
--Review the output in delta format as a temp view. can create static tables from delta temp views if needed
--select * from ccda_pre_signed_urls
--select * from data_engineer_exam_claims_final
--select * from data_engineer_exam_rx_final

pre_signed_urls
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_055d57c0a288fb38b46a4cf1431ef42345b9f60b_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=WnCYq6Lv5IfDjLXRG7SD2xcn9lE%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_11772d5b79eb94bc68a95e55a3ec43d615cd23d9_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=XhEHU2Fv5AnUJQB%2BbdAAFqdL5LM%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_13a46b10c76fafbfc47d20cf9a8593d10382786d_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=T3rfI9vQzUgltPYTF1RYCGprsSo%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_1557496d7ca3c40fd7180d3b8c7b0b561cd1ee35_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=vtXViNkODxPEn6GJ3%2F4MG2QQUis%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_16e6aea35f4478ac95bd72a77945f989a4d0f09c_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=Vbu91%2Ft7CZmc9WN0FWvCg0uDi5c%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_1a8322b4ef50813d913c456195eb0708a2dc4aa1_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=lC2ppQJ501ej6SJQU6KjKbNNGk8%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_1b8923023e521f1984c047d3b1e08273f2d1ccc2_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=Dc4N1orb3ngUVPk9gekRzmCAAMM%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_3246489cc868f679aa3a688cac40554a9e42a85c_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=FEgOrhn9zgMnEg7oBCn879WoNqQ%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_39d5bf3941cea0291522ded940c930439970cca2_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=acW6Cj%2Bx4XwpfyL7TsS3pZfdcLc%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512
https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/005ao54m-c566-7671-19c0-596o1os969x4/005ao54m-c566-7671-19c0-596o1os969x4_4527a80bdf7692f6e654d4ba3be278970eec5307_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=ZkCnzGaqA5WOCgOgCJw8%2F4GmL%2Fs%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512


## For pre-signed urls, you can ingest directly from S3 by creating a unity catalog location, but I'll do it manually here.

In [0]:
testUrl = 'https://mil-s3-portal-intelli-intelli-irixehr-staging-prod.s3.amazonaws.com/trove/out_sample/2w8e42im-c2g2-76e3-i5d4-86xn28060yz0/2w8e42im-c2g2-76e3-i5d4-86xn28060yz0_56f4d610e3c5d79b7584e63f1b1a3df644c7eb6a_masked.xml?AWSAccessKeyId=ASIASIMKARLO6WRCWWLK&Signature=DrJpUyjcHba6hJrNEh4Iim0aQow%3D&x-amz-security-token=IQoJb3JpZ2luX2VjELf%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FwEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs%2BmX3aDZRWYlthJ8L45%2BAgdaZhhQIgJCsXl04MltcGNJ05qP%2B5IJQaVBpOBFrMifRNaaPgzSQqrwMIv%2F%2F%2F%2F%2F%2F%2F%2F%2F%2F%2FARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0%2FztIgUzBU3ALs%2BlC%2BXkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu%2FFr2GmOemSCLp3lcExd3REdwQ4jSNBP%2FxSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV%2Bv0nJCoW7tY%2F78clclqjd9e%2BRssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7%2BGzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX%2FW57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO%2FM5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4%2BP3qk9vLI3WaHkogsYWlwJ4adQpV%2FX2jq2Sv0TCSylLafAJoFzl%2BnogJbtJHYzG0IbsVe%2B8Z8sBGBoE%2BH7toNKN4bQaJ7GRMOYkNaehHWBoNk8Wb8kUg647WcHDTv1NTcAklbCTAZqpZV1pdYEyezXOSHrpwxw%3D&Expires=1738937512'


In [0]:
import requests
strS3Url = testUrl
res = requests.get(testUrl)

In [0]:
print(res.text)
#Getting Error- ExpiredToken

<?xml version="1.0" encoding="UTF-8"?>
<Error><Code>ExpiredToken</Code><Message>The provided token has expired.</Message><Token-0>IQoJb3JpZ2luX2VjELf//////////wEaCXVzLWVhc3QtMSJHMEUCIQCLFmdfK87a2HvlCNs+mX3aDZRWYlthJ8L45+AgdaZhhQIgJCsXl04MltcGNJ05qP+5IJQaVBpOBFrMifRNaaPgzSQqrwMIv///////////ARADGgwxNTU0NDUxMzYwOTMiDA1kDRECZ1GO1n3hViqDAzA9x829793Fg0hPtyrFGnJOequxhFDugA1anhqtLdAiJE0/ztIgUzBU3ALs+lC+XkJ9vsFRRgJz30TVnKn0VXfEb76ww5NsFdFlmG0HH2JjvYWKqOcO3DX0ZBdIeTMsD0Qwm0Lpq51FPDpnNOGcHROTnNu/Fr2GmOemSCLp3lcExd3REdwQ4jSNBP/xSClsIKzOWP39wAYHUchOh4p6Oeq3O7VKwl0WDQkmS2tHNeQDzesFpWbJwnq2kZV+v0nJCoW7tY/78clclqjd9e+RssCmmxeuwrrOnBi3JD1jCq89kYw4UIDLnvz1RjardkSt7Zl9shRpQzOqnDvUwH35NKT3iiVaMXg2EiJC3iVoZu1Nr7A7+GzPvTUL22zzVtWMWWVOV5OUpdklFQqCgL6flgTE9B0rQbQ1YX/W57zCduKYbkBODCACUm6uuLTcAj5WoV3eF8SiAfgA9YMizPY4aoXT19GAbdkv5lH9ahiSvGfWP6SM0tFESjQDMTD97Ek7P2Uc2TCotPO8BjqmAQK7EVUYpgdSgtj8RW5k481xlI9a9jO/M5t2HHLzyBnQTtLUvsVosVJa4cwmUSHl4+P3qk9vLI3WaHkogsYWlwJ4adQpV/X2jq2Sv0TCSylLafAJoFzl+nogJbtJHYzG0IbsVe+8Z8

# Parse the resultant XML into a dataframe and pull out the Medicatios and Problems, and iterate into One Big Table (OBT) or dataframe

# Combine OBT and other ref tables for analysis

# Store the result of the ingestion

# Write _INIT_ using the above

# Write tests for functions used in the processing
# Write validation and QA steps into pipeline

# Document and describe pipeline requirements for orchestration

# Set up orchestration

# Set notifications and logging