### 1. Extract All the Fields from UK Biobank

In [None]:
### Getting File ready for Table Exporter in DNANexus
## This extracts all the fields into 3 files | We are mainly interested in the data.dataset.data_dictionary.csv file
# Remember that you data.dataset needs to match your dataset in the UK Biobank folder

import dxpy
import subprocess

# Automatically discover dispensed dataset ID and load the dataset 
# 'data.dataset' you would need to make this your dataset name
dispensed_dataset_id = dxpy.find_one_data_object(typename='Dataset', name='data.dataset', folder='/', name_mode='glob')['id']

# Get project ID
project_id = dxpy.find_one_project()["id"]
dataset = (':').join([project_id, dispensed_dataset_id])

cmd = ["dx", "extract_dataset", dataset, "-ddd", "--delimiter", ","]
subprocess.check_call(cmd)


### 2. Filter the Fields that you need into a text file ready for Table Exporter

In [None]:
import pandas as pd

# Load the CSV file into a DataFrame
df = pd.read_csv('data.dataset.data_dictionary.csv')  # replace with the actual path to your file

# Create a filter to keep rows where 'units' has a value or where 'name' is 'eid'
# I created a simple filter by only choosing the rows with units and including the participant code
filtered_df = df[(df['units'].notna()) | (df['name'] == 'eid')]

# Select only the 'name' column
name_column = filtered_df['name']

# Save to a txt file without header or index
name_column.to_csv('unit_filter.txt', header=False, index=False)

### 3. Export File back to UK Biobank RAP

In [None]:
# Export Data saved on Jupyter Notebook back to RAP
import dxpy
     
# Specify the file names to export to the RAP
# You will need to specify the file names
file_paths = [
    "field_name_column.txt",
    "data.dataset.codings.csv", 
    "data.dataset.data_dictionary.csv", 
    "data.dataset.entity_dictionary.csv"]
     

# Specify the destination to export to the RAP
# the folder destination needs to start with = "/ the forward slash is important
destination_folder = "/MSc Project/Output"
     

# Run the loop over the entire list of file names
for file_path in file_paths:
    uploaded_file = dxpy.upload_local_file(filename=file_path, folder=destination_folder)