# Illustrate reading CSV files from Analysis containers within Dataviews in the SDK

Flywheel v16.8 or higher

This notebook shows the approach for loading CSV files into a dataview.

See the online SDK help for more details: https://flywheel-io.gitlab.io/product/backend/sdk/branches/master/python/data_views.html#files



## Install SDK and start client

In [None]:
!pip install flywheel-sdk

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting flywheel-sdk
  Downloading flywheel_sdk-16.15.2-py2.py3-none-any.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
Collecting requests-toolbelt
  Downloading requests_toolbelt-0.10.1-py2.py3-none-any.whl (54 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m54.5/54.5 KB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: requests-toolbelt, flywheel-sdk
Successfully installed flywheel-sdk-16.15.2 requests-toolbelt-0.10.1


In [None]:
import pandas as pd

In [None]:
import flywheel
from getpass import getpass
try:
    with open("api_key.txt") as file:
      api_key = file.read()
except FileNotFoundError:
    api_key = getpass("Enter your Flywheel API Key:")
    with open("api_key.txt",'w') as file:
      file.write(api_key)

Enter your Flywheel API Key:··········


In [None]:
fw = flywheel.Client(api_key)
self = fw.get_current_user()
config = fw.get_config()
print('I am %s %s, connected to %s.' % (self.firstname, self.lastname, config['site']['api_url']))
version = fw.get_version()
print('Flywheel release: %s\nCLI version: %s\nDatabase version: %s' % (version.flywheel_release, version.cli_version, version.database))

print("API Docs:", config['site']['api_url']+"/docs")

# Locate the project

In [None]:
project = fw.lookup("ml/ADNI")

## What columns do we want from the csv file?
Since the CSV files will be in Flywheel we will download one and use the columns in it.  We will assume they are all the same in terms of columns.

In [None]:
csv_columns = ['Container','Name','Status','Origin Path','Export Path','Found Files','Created Files','Failed Files']

## Create the dataview

In [None]:
#pick the columns in the metadata
columns = ['subject.label',
           'session.label' ] 

In [None]:
# Specify the dataview
builder = flywheel.ViewBuilder(label='SDK Analysis CSV Aggregation',
                               columns = columns,
                               container='session', #Needed for file metadata
                               #analysis_label='',
                               analysis_gear_name = 'session-export',
                               filename='*.csv', # Needed for file metadata
                               match='newest',
                               process_files=True,
                               include_ids=False,
                               include_labels=False,
                               sort=False,
                              )


In [None]:
#Use the newest file if there are dups in the match
#builder.file_match('newest')



In [None]:
#Add our CSV columns to the builder
for c in csv_columns:
  builder.file_column(c)


In [None]:
# Create the dataview specification
sdk_dataview = builder.build()

In [None]:
# Create the Dataview in Flywheel
view_id = fw.add_view(project.id, sdk_dataview)

In [None]:
#Create the dataview and execute it, waiting for return
#This may take 5-10 minutes
df = fw.read_view_dataframe(sdk_dataview, project.id)

In [None]:
df

Unnamed: 0,subject.label,session.label,Container,Name,Status,Origin Path,Export Path,Found Files,Created Files,Failed Files,errors
0,002_S_0295,2006-11-02_08_16_44.0,subject,002_S_0295,created,ml/ADNI/002_S_0295,qian/export_adni_1/002_S_0295,,,,
1,002_S_0295,2006-11-02_08_16_44.0,session,2006-11-02_08_16_44.0,created,ml/ADNI/002_S_0295/2006-11-02_08_16_44.0,qian/export_adni_1/002_S_0295/2006-11-02_08_16...,,,,
2,002_S_0295,2006-11-02_08_16_44.0,acquisition,Hippocampal_Mask,created,ml/ADNI/002_S_0295/2006-11-02_08_16_44.0/Hippo...,qian/export_adni_1/002_S_0295/2006-11-02_08_16...,,('ADNI_002_S_0295_MR_Hippocampal_Mask_Hi_20080...,,
3,002_S_0295,2006-11-02_08_16_44.0,acquisition,MIDAS_Whole_Brain_Mask,created,ml/ADNI/002_S_0295/2006-11-02_08_16_44.0/MIDAS...,qian/export_adni_1/002_S_0295/2006-11-02_08_16...,,('ADNI_002_S_0295_MR_MIDAS_Whole_Brain_Mask_Br...,,
4,016_S_6381,y2,subject,016_S_6381,created,ml/ADNI/016_S_6381,nicolas/ADNI-perimeter-test/016_S_6381,,,,
...,...,...,...,...,...,...,...,...,...,...,...
15799,941_S_6580,y1,,,,,,,,,
15800,941_S_6580,y2,,,,,,,,,
15801,941_S_6581,sc,,,,,,,,,
15802,941_S_6581,y2,,,,,,,,,


In [None]:
with pd.option_context('display.max_rows',100):
  display(df)

Unnamed: 0,subject.label,session.label,Container,Name,Status,Origin Path,Export Path,Found Files,Created Files,Failed Files,errors
0,002_S_0295,2006-11-02_08_16_44.0,subject,002_S_0295,created,ml/ADNI/002_S_0295,qian/export_adni_1/002_S_0295,,,,
1,002_S_0295,2006-11-02_08_16_44.0,session,2006-11-02_08_16_44.0,created,ml/ADNI/002_S_0295/2006-11-02_08_16_44.0,qian/export_adni_1/002_S_0295/2006-11-02_08_16...,,,,
2,002_S_0295,2006-11-02_08_16_44.0,acquisition,Hippocampal_Mask,created,ml/ADNI/002_S_0295/2006-11-02_08_16_44.0/Hippo...,qian/export_adni_1/002_S_0295/2006-11-02_08_16...,,('ADNI_002_S_0295_MR_Hippocampal_Mask_Hi_20080...,,
3,002_S_0295,2006-11-02_08_16_44.0,acquisition,MIDAS_Whole_Brain_Mask,created,ml/ADNI/002_S_0295/2006-11-02_08_16_44.0/MIDAS...,qian/export_adni_1/002_S_0295/2006-11-02_08_16...,,('ADNI_002_S_0295_MR_MIDAS_Whole_Brain_Mask_Br...,,
4,016_S_6381,y2,subject,016_S_6381,created,ml/ADNI/016_S_6381,nicolas/ADNI-perimeter-test/016_S_6381,,,,
...,...,...,...,...,...,...,...,...,...,...,...
15799,941_S_6580,y1,,,,,,,,,
15800,941_S_6580,y2,,,,,,,,,
15801,941_S_6581,sc,,,,,,,,,
15802,941_S_6581,y2,,,,,,,,,


## Clean up the Flywheel objects

### Delete the Dataview

In [None]:
for dv in fw.get_views(project.id):
  print(dv.label,end='---> ')
  if dv.label in ['SDK Analysis CSV Aggregation']:
    fw.delete_view(dv.id)
    print('deleted')
  else:
    print('remaining')

FileDV---> remaining
jen testing promoted---> remaining
Alex test data view sitewide---> remaining
jen6---> remaining
SDK Data View---> deleted
