fix yaml to run on schedule, add more tests, & move file configuratio…

…n instructions from yaml file to python files update demo
donishadsmith · Jul 20, 2024 · 5bf10ba · 5bf10ba
1 parent ab9e248
commit 5bf10ba
Show file tree

Hide file tree

Showing 8 changed files with 796 additions and 592 deletions.
diff --git a/.github/workflows/testing.yaml b/.github/workflows/testing.yaml
@@ -3,9 +3,9 @@ name: testing
 on:
   push:
     branches: [main, master, test]
-    # Run weekly on Friday
-    schedule:
-    - cron: 0 0 * * 5
+  # Run weekly on Monday
+  schedule:
+    - cron: 0 0 * * 1
 
 jobs:
   test-build:
@@ -14,7 +14,7 @@ jobs:
       matrix:
         os: ['ubuntu-latest', 'macos-latest', 'windows-latest']
         python-ver: ['3.9', '3.10', '3.11', '3.12']
-    name: Python ${{ matrix.python-ver }} check on ${{ matrix.os }} 
+    name: Python ${{ matrix.python-ver }} check on ${{ matrix.os }}
     steps:
       - uses: actions/checkout@v3
       - uses: actions/setup-python@v2
@@ -35,35 +35,7 @@ jobs:
         if: ${{ matrix.os  == 'ubuntu-latest' || matrix.os == 'macos-latest' }}
         run: |
           pytest test_TimeseriesExtractor.py
+          pytest test_TimeseriesExtractor_config.py
+          pytest test_TimeseriesExtractor_modified.py
         shell: bash
         working-directory: tests
-      - name: Change file names for additional TimeseriesExtractor tests
-        if: ${{ matrix.os  == 'ubuntu-latest' || matrix.os == 'macos-latest' }}
-        run: |
-          mv sub-01_ses-002_task-rest_run-001_desc-confounds_timeseries.tsv sub-01_task-rest_desc-confounds_timeseries.tsv
-          mv sub-01_ses-002_task-rest_run-001_desc-confounds_timeseries.json sub-01_task-rest_desc-confounds_timeseries.json
-          mv sub-01_ses-002_task-rest_run-001_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz sub-01_task-rest_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz
-          mv sub-01_ses-002_task-rest_run-001_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz sub-01_task-rest_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz
-        shell: bash
-        working-directory: tests/ds000031_R1.0.4_ses001-022/ds000031_R1.0.4/derivatives/fmriprep_1.0.0/fmriprep/sub-01/ses-002/func
-      - name: Perform additional TimeseriesExtractor test
-        if: ${{ matrix.os  == 'ubuntu-latest' || matrix.os == 'macos-latest' }}
-        run: |
-          pytest test_TimeseriesExtractor_additional.py
-        shell: bash
-        working-directory: tests
-      - name: Remove mask file
-        if: ${{ matrix.os  == 'ubuntu-latest' || matrix.os == 'macos-latest' }}
-        run: |
-          rm sub-01_task-rest_space-MNI152NLin2009cAsym_desc-brain_mask.nii.gz
-        shell: bash
-        working-directory: tests/ds000031_R1.0.4_ses001-022/ds000031_R1.0.4/derivatives/fmriprep_1.0.0/fmriprep/sub-01/ses-002/func
-      - name: Perform additional TimeseriesExtractor test
-        if: ${{ matrix.os  == 'ubuntu-latest' || matrix.os == 'macos-latest' }}
-        run: |
-          pytest test_TimeseriesExtractor_additional.py
-        shell: bash
-        working-directory: tests
-
-
-
diff --git a/README.md b/README.md
@@ -60,7 +60,7 @@ parcel_approach = {"AAL": {"version": "SPM12"}}
 If using a "Custom" parcellation approach, ensure each region in your dataset includes both left (lh) and right (rh) hemisphere versions of nodes (bilateral nodes). 
 
 Custom Key Structure:
-- `"maps'`: Directory path containing necessary parcellation files. Ensure files are in a supported format (e.g., .nii for NifTI files). For plotting purposes, this key is not required.
+- `"maps"`: Directory path containing necessary parcellation files. Ensure files are in a supported format (e.g., .nii for NifTI files). For plotting purposes, this key is not required.
 - `"nodes"`: List of all node labels used in your study, arranged in the exact order they correspond to indices in your parcellation files. 
 Each label should match the parcellation index it represents. For example, if the parcellation label "0" corresponds to the left hemisphere 
 visual cortex area 1, then "LH_Vis1" should occupy the 0th index in this list. This ensures that data extraction and analysis accurately reflect the anatomical regions intended. For timeseries extraction, this key is not required.

diff --git a/demo.ipynb b/demo.ipynb
diff --git a/tests/test_CAP.py b/tests/test_CAP.py
@@ -191,9 +191,8 @@ def test_groups_and_cluster_selection():
     assert cap_analysis.caps["A"]["CAP-2"].shape == (100,)
     assert cap_analysis.caps["B"]["CAP-1"].shape == (100,)
     assert cap_analysis.caps["B"]["CAP-2"].shape == (100,)
-        # Elbow sometimes does find the elbow with random data, uses kneed to locate elbow
-
-    # Elbow sometimes does find the elbow with random data
+
+    # Elbow sometimes does find the elbow with random data, uses kneed to locate elbow
     try:
         cap_analysis = CAP(parcel_approach=extractor.parcel_approach, groups={"A": [1,2,3,5], "B": [4,6,7,8,9,10,7]})
         cap_analysis.get_caps(subject_timeseries=extractor.subject_timeseries,
@@ -292,13 +291,13 @@ def test_multiple_methods():
 
     cap_analysis.calculate_metrics(subject_timeseries=new_timeseries, return_df=True)
 
-    # No crashing
-    met1 = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True, runs=1)
-    met2 = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True, runs=1,
-                                   continuous_runs=True)
-
-    # If only one run continuous_runs should not differ
-    assert met1["persistence"].equals(met2["persistence"])
+    for i in range(1,4):
+        met1 = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True, runs=i)
+        met2 = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True, runs=i,
+                                              continuous_runs=True)
+        
+        # If only one run continuous_runs should not differ
+        assert met1["persistence"].equals(met2["persistence"])
 
     met1 = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True)
     met2 = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True,continuous_runs=True)
@@ -309,6 +308,20 @@ def test_multiple_methods():
     # Continuous run should have 1/3 the number of rows since each subject in the randomized data has three runs
     assert met1["persistence"].shape[0]/3 == met2["persistence"].shape[0]
 
+    # Counts and Temporal; temporal_fraction is frequency converted to proportion
+    cap_analysis.get_caps(subject_timeseries=extractor.subject_timeseries,
+                          n_clusters=[2,3,4,5], cluster_selection_method="silhouette")
+    counts = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True, metrics="counts")["counts"]
+    temp = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True, metrics="temporal_fraction")["temporal_fraction"]
+
+    assert counts[["CAP-1", "CAP-2"]].map(lambda x: x/100).equals(temp[["CAP-1", "CAP-2"]])
+
+    # Check for continuous too
+    counts = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True, metrics="counts", continuous_runs=True)["counts"]
+    temp = cap_analysis.calculate_metrics(subject_timeseries=extractor.subject_timeseries, return_df=True, metrics="temporal_fraction", continuous_runs=True)["temporal_fraction"]
+
+    assert counts[["CAP-1", "CAP-2"]].map(lambda x: x/300).equals(temp[["CAP-1", "CAP-2"]])
+
     cap_analysis.caps2plot(subplots=True, xlabel_rotation=90, sharey=True, borderwidths=10, show_figs=False)
 
     cap_analysis.caps2plot(subplots=False, yticklabels_size=5, wspace = 0.1, visual_scope="nodes", xlabel_rotation=90,

diff --git a/tests/test_TimeseriesExtractor.py b/tests/test_TimeseriesExtractor.py
@@ -128,11 +128,12 @@ def test_check_parallel_and_non_parallel():
     extractor.get_bold(bids_dir=bids_dir, session='002', runs="001",task="rest", pipeline_name=pipeline_name,
                        tr=1.2, n_cores=1)
 
-    parallel_timeseries = extractor.subject_timeseries["01"]["run-001"]
+    parallel_timeseries = copy.deepcopy(extractor.subject_timeseries["01"]["run-001"])
 
     extractor.get_bold(bids_dir=bids_dir, session='002', runs="001",task="rest", pipeline_name=pipeline_name,
                        tr=1.2, n_cores=None)
 
+    assert extractor.subject_timeseries["01"]["run-001"].shape[0] == 40
     assert np.array_equal(parallel_timeseries, extractor.subject_timeseries["01"]["run-001"])
 
 @pytest.mark.parametrize("use_confounds", [True, False])

diff --git a/tests/test_TimeseriesExtractor_additional.py b/tests/test_TimeseriesExtractor_additional.py
diff --git a/tests/test_TimeseriesExtractor_config.py b/tests/test_TimeseriesExtractor_config.py
@@ -0,0 +1,96 @@
+import os, glob, shutil, pytest, numpy as np, pandas as pd
+from neurocaps.extraction import TimeseriesExtractor
+
+dir = os.path.dirname(__file__)
+bids_dir = os.path.join(dir, "ds000031_R1.0.4_ses001-022/ds000031_R1.0.4/")
+pipeline_name = "fmriprep_1.0.0/fmriprep"
+confounds=["Cosine*", "aComp*", "Rot*"]
+parcel_approach = {"Custom": {"maps": os.path.join(dir, "HCPex.nii.gz")}}
+
+work_dir = os.path.join(bids_dir,"derivatives",pipeline_name)
+# Duplicate data to create a subject 02 folder
+cmd = f"mkdir -p {work_dir}/sub-02 && cp -r {work_dir}/sub-01/* {work_dir}/sub-02/"
+os.system(cmd)
+files = glob.glob(os.path.join(work_dir, "sub-02/ses-002/func", "*"))
+[os.rename(x,x.replace("sub-01_","sub-02_" )) for x in files]
+
+# Add another session for sub 01
+cmd = f"mkdir -p {work_dir}/sub-01/ses-003 && cp -r {work_dir}/sub-01/ses-002/* {work_dir}/sub-01/ses-003"
+os.system(cmd)
+files = glob.glob(os.path.join(work_dir, "sub-01/ses-003/func", "*"))
+[os.rename(x,x.replace("ses-002_","ses-003_" )) for x in files]
+
+# Add second run to sub_01
+files = glob.glob(os.path.join(work_dir, "sub-01/ses-002/func","*"))
+[shutil.copyfile(x,x.replace("run-001","run-002")) for x in files]
+
+# Modify confound data for run 002 of subject 01 and subject 02
+confound_files = glob.glob(os.path.join(work_dir, "sub-01/ses-002/func","*run-002*confounds_timeseries.tsv")) + glob.glob(os.path.join(work_dir, "sub-02/ses-002/func","*run-001*confounds_timeseries.tsv"))
+for file in confound_files:
+    confound_df = pd.read_csv(file, sep="\t")
+    confound_df["Cosine00"] = [x[0] for x in np.random.rand(40,1)]
+    confound_df.to_csv(file, sep="\t", index=None)
+
+# Should be able to retrieve and append data for each run and subject; Demonstrates it can retrieve subject specific file content
+@pytest.mark.parametrize("n_cores", [None,1])
+def test_append(n_cores):
+    parcel_approach = {"Schaefer": {"yeo_networks": 7}}
+    extractor = TimeseriesExtractor(parcel_approach=parcel_approach, standardize="zscore_sample",
+                                    use_confounds=True, detrend=True, low_pass=0.15, high_pass=0.08,
+                                    confound_names=confounds)
+
+    extractor.get_bold(bids_dir=bids_dir, task="rest", session="002",pipeline_name=pipeline_name, tr=1.2, n_cores=n_cores)
+
+    assert extractor.subject_timeseries["01"]["run-001"].shape == (40,400)
+    assert extractor.subject_timeseries["01"]["run-002"].shape == (40,400)
+    assert extractor.subject_timeseries["02"]["run-001"].shape == (40,400)
+
+    assert ["run-001", "run-002"] == list(extractor.subject_timeseries["01"])
+    assert ["run-001"] == list(extractor.subject_timeseries["02"])
+    assert not np.array_equal(extractor.subject_timeseries["01"]["run-001"], extractor.subject_timeseries["01"]["run-002"])
+    assert not np.array_equal(extractor.subject_timeseries["02"]["run-001"], extractor.subject_timeseries["01"]["run-002"])
+
+@pytest.mark.parametrize("runs",["001", ["002"]])
+def test_runs(runs):
+    parcel_approach = {"Schaefer": {"n_rois": 400}}
+    extractor = TimeseriesExtractor(parcel_approach=parcel_approach, standardize="zscore_sample",
+                                    use_confounds=True, detrend=True, low_pass=0.15, high_pass=0.08,
+                                    confound_names=confounds)
+
+    extractor.get_bold(bids_dir=bids_dir, task="rest", session="002",runs=runs, pipeline_name=pipeline_name, tr=1.2)
+
+    if runs == "001":
+        assert ["01", "02"] == list(extractor.subject_timeseries)
+        assert extractor.subject_timeseries["01"]["run-001"].shape == (40,400)
+        assert extractor.subject_timeseries["02"]["run-001"].shape == (40,400)
+
+        assert ["run-001"] == list(extractor.subject_timeseries["01"])
+        assert ["run-001"] == list(extractor.subject_timeseries["02"])
+        assert not np.array_equal(extractor.subject_timeseries["02"]["run-001"], extractor.subject_timeseries["01"]["run-001"])
+    else:
+        assert ["01"] == list(extractor.subject_timeseries)
+        assert ["run-002"] == list(extractor.subject_timeseries["01"]) 
+
+def test_session():
+    parcel_approach = {"Schaefer": {"yeo_networks": 7}}
+    extractor = TimeseriesExtractor(parcel_approach=parcel_approach, standardize="zscore_sample",
+                                    use_confounds=True, detrend=True, low_pass=0.15, high_pass=0.08,
+                                    confound_names=confounds)
+
+    extractor.get_bold(bids_dir=bids_dir, task="rest", session="003",pipeline_name=pipeline_name, tr=1.2)
+
+    # Only sub 01 and run-001 should be in subject_timeseries
+    assert extractor.subject_timeseries["01"]["run-001"].shape == (40,400)
+
+    assert ["run-001"] == list(extractor.subject_timeseries["01"])
+    assert ["02"] not in  list(extractor.subject_timeseries)
+
+def test_session_error():
+    parcel_approach = {"Schaefer": {"yeo_networks": 7}}
+    extractor = TimeseriesExtractor(parcel_approach=parcel_approach, standardize="zscore_sample",
+                                    use_confounds=True, detrend=True, low_pass=0.15, high_pass=0.08,
+                                    confound_names=confounds)
+
+    # Should raise value error since sub-01 will have 2 sessions detected
+    with pytest.raises(ValueError):
+        extractor.get_bold(bids_dir=bids_dir, task="rest",pipeline_name=pipeline_name, tr=1.2)