To download data from the experiment 'Single-trial neural dynamics are dominated by richly varied movements. by Churchland et al. 

'https://labshare.cshl.edu/shares/library/repository/38599/

Use 

```wget -r -np -nH --cut-dirs=4 --accept-regex '.*\/SVD_Cam.*-Seg.*\.mat' --reject 'index.html' https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/mSM49/SpatialDisc/30-Jul-2018/BehaviorVideo/ -P /home/sachinks/Data/raw/mouse-cshl```

command to download the data using terminal

The following code downloads the requested data using python

In [107]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin
import re
import pandas as pd
import os
import yaml

In [136]:
with open(f'../configs/download_data_2.yaml', 'r') as file:
    config = yaml.safe_load(file)
    base_url = config['path']['base_url']
    pattern_url = config['path']['pattern_url']
    file_list = config['path']['file_list']
    output_dir = config['path']['output']

In [142]:
def find_paths(base_url, pattern_url):
    def is_valid_path(href):
        if href.startswith('/shares/'): # skip looking at parent directory
            return False
        if href.startswith('?'): # irrelevant files
            return False
        return True

    matching_paths = []

    def find_files_recursive(url):
        # Send an HTTP GET request to the URL
        response = requests.get(url)

        # Check if the request was successful (status code 200)
        if response.status_code == 200:
            # Parse the HTML content
            soup = BeautifulSoup(response.text, "html.parser")

            # Find all links on the page
            links = soup.find_all("a")

            for link in links:
                # Get the URL
                href = link.get("href")

                # Join the URL with the base URL to get the complete path
                file_url = urljoin(url, href)

                if not is_valid_path(href):
                    continue
                
                # Check if the URL matches the specified pattern
                if re.match(pattern_url, file_url):
                    matching_paths.append(file_url)
                    print(f'{len(matching_paths)}: {file_url}')
                    continue

                if href.endswith('/'):
                    find_files_recursive(file_url)
        else:
            print("Failed to retrieve the webpage:", url)

    # Start the recursive search from the base URL
    find_files_recursive(base_url)

    return matching_paths


In [144]:
# Check if the file exists
if os.path.exists(file_list):
    print("Precomputed values already exist. Loading it...")
    # Load the file content into a DataFrame
    df = pd.read_csv(file_list)
    matching_paths = df.iloc[:, 0].values.tolist()
else:
    print("Precomputed values do not exist. Searching it...")
    matching_paths = find_paths(base_url, pattern_url)
    df = pd.DataFrame(matching_paths)
    df.to_csv(file_list, index=False, header=False)


Precomputed values do not exist. Searching it...
1: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/RS02/SpatialDisc/15-Nov-2018/
2: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/RS04/SpatialDisc/01-Nov-2018/
3: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/RS04/SpatialDisc/13-Nov-2018/
4: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/RS04/SpatialDisc/30-Oct-2018/
5: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/mSM49/SpatialDisc/03-Aug-2018/
6: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/mSM49/SpatialDisc/07-Aug-2018/
7: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/mSM49/SpatialDisc/14-Sep-2018/
8: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/mSM49/SpatialDisc/21-Sep-2018/
9: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/mSM49/SpatialDisc/30-Ju

In [102]:
import requests

# List of URLs to download
urls = matching_paths[:2]

# Loop through the list of URLs and download each one
for url in urls:
    file_path = url.split("/")
    file_path = [file_path[-7], file_path[-5], file_path[-3], file_path[-2], file_path[-1]]
    file_path = '/'.join(file_path)

    # Create the full path to save the file
    file_path = f'{output_dir}/{file_path}'

    # Send an HTTP GET request to the URL
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Save the content to a local file
        directory = os.path.dirname(file_path)

        # Check if the directory exists, and if not, create it
        if not os.path.exists(directory):
            os.makedirs(directory)

        with open(file_path, 'wb') as file:
            file.write(response.content)
        print(f"Downloaded: {url}")
    else:
        print(f"Failed to download: {url}")

https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/RS02/SpatialDisc/15-Nov-2018/BehaviorVideo/SVD_Cam1-Seg1.mat
Downloaded: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/RS02/SpatialDisc/15-Nov-2018/BehaviorVideo/SVD_Cam1-Seg1.mat
https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/RS02/SpatialDisc/15-Nov-2018/BehaviorVideo/SVD_Cam1-Seg2.mat
Downloaded: https://labshare.cshl.edu/shares/library/repository/38599/2pData/Animals/RS02/SpatialDisc/15-Nov-2018/BehaviorVideo/SVD_Cam1-Seg2.mat
