In [11]:
from bioblend.galaxy import GalaxyInstance
from bioblend.galaxy import dataset_collections
import os
import json
import time
import csv
import datetime

In [12]:
server = 'https://usegalaxy.eu/'
api_key = os.environ['my_galaxy_api']
gi = GalaxyInstance(server, key=api_key)

In [3]:
# show all history
# gi.histories.get_histories()

# show specific history with ID
# gi.histories.show_history('ecfc8c06ceae22af', contents=False)

# show dataset with id
# gi.datasets.show_dataset('4838ba20a6d86765e865eafb64b12ee6')

# show all workflows
# gi.workflows.get_workflows()

# get specific workflow with id
# wf_shotgun_paired = gi.workflows.show_workflow('dde25e07f2db3f7c')

# show workflow inputs
# wf_shotgun_paired['inputs']

# upload file locally
# gi.tools.upload_file('test.txt', 'f3c2b0f3ecac9f02')

# upload file using FTP when it's over 2 G
# gi.tools.upload_from_ftp('test.txt', 'f3c2b0f3ecac9f02')

# upload data with urls
# gi.tools.put_url(content=dataset_links, history_id=history_id)

# create dataset collection
# collection_response = gi.histories.create_dataset_collection(
#     history_id='d786d448a802ae4b',
#     collection_description=dataset_collections.CollectionDescription(
#         name="MyListOfPairedDatasets",
#         type="list:paired",
#         elements=[
#             dataset_collections.CollectionElement(
#                 name="sample1",
#                 type="paired",
#                 elements=[
#                     dataset_collections.HistoryDatasetElement(name="forward", id='4838ba20a6d867655f39fcd3cfc9f5ca'),
#                     dataset_collections.HistoryDatasetElement(name="reverse", id='4838ba20a6d86765eeb232dc71866fdd'),
#                 ]
#             )
#         ]
#     )
# )

[{'model_class': 'History',
  'id': 'fbd2f0e6fce8bebc',
  'name': 'PRJNA390460',
  'deleted': False,
  'purged': False,
  'url': '/api/histories/fbd2f0e6fce8bebc',
  'published': False,
  'annotation': None,
  'tags': [],
  'update_time': '2023-02-01T09:52:03.754896'},
 {'model_class': 'History',
  'id': 'ee9cf18ea49d2fb1',
  'name': 'qiime2 test',
  'deleted': False,
  'purged': False,
  'url': '/api/histories/ee9cf18ea49d2fb1',
  'published': False,
  'annotation': None,
  'tags': [],
  'update_time': '2023-01-30T19:11:02.169097'},
 {'model_class': 'History',
  'id': 'ecfc8c06ceae22af',
  'name': 'shotgun paired test run',
  'deleted': False,
  'purged': False,
  'url': '/api/histories/ecfc8c06ceae22af',
  'published': False,
  'annotation': None,
  'tags': [],
  'update_time': '2023-01-24T17:43:51.334962'}]

In [2]:
def get_project_to_history_id_info():
    with open('projects.json', 'r') as f:
        data = json.load(f)
    return data

def update_project_to_history_id_info(payload):
    with open('projects.json','r+') as f:
        file_data = json.load(f)
        file_data.update(payload)
        f.seek(0)
        json.dump(file_data, f, indent = 4)

def upload_dataset(gi, dataset_links, history_id):
    r = gi.tools.put_url(
        content=dataset_links,
        history_id=history_id,
    )
    return r

def create_paired_dataset_collection(dataset_list):
    print("ha1")
    collection_elements = []
    for i in range(0, len(dataset_list), 2):
        ce = dataset_collections.CollectionElement(
            name=("sample"+str(len(collection_elements)+1)),
            type="paired",
            elements=[
                dataset_collections.HistoryDatasetElement(name="forward", id=dataset_list[i]['id']),
                dataset_collections.HistoryDatasetElement(name="reverse", id=dataset_list[i+1]['id']),
            ]
        )
        collection_elements.append(ce)
    print("ha2")
    return dataset_collections.CollectionDescription(
        name="MyListOfPairedDatasets",
        type="list:paired",
        elements=collection_elements
    )


def monitor_workflow_execution(gi, invocation_id, max_attempts=100, sleep_time=60):
    """
    Monitors the status of a workflow execution in Galaxy

    Parameters:
    gi: Galaxy instance
    invocation_id: ID of the workflow invocation
    max_attempts: Maximum number of status checks before giving up (default: 100)
    sleep_time: Time in seconds to wait between checks (default: 60)
    """
    # Open the CSV file in append mode
    with open("workflow_status.csv", "a") as file:
        writer = csv.writer(file)
        # Write the header if the file is empty
        if file.tell() == 0:
            writer.writerow(["Timestamp", "Invocation ID", "Attempt", "Status"])

        for attempt in range(max_attempts):
            invocation = gi.workflows.show_invocation(invocation_id)
            state = invocation['state']
            timestamp = datetime.datetime.now().isoformat()
            
            if state == 'new':
                writer.writerow([timestamp, invocation_id, attempt + 1, "Queued"])
            elif state == 'scheduled':
                writer.writerow([timestamp, invocation_id, attempt + 1, "Running"])
            elif state == 'ok':
                writer.writerow([timestamp, invocation_id, attempt + 1, "Completed"])
                return invocation
            elif state in ['error', 'failed']:
                writer.writerow([timestamp, invocation_id, attempt + 1, "Error"])
                return invocation
            
            # if state is not one of the expected values, continue checking
            writer.writerow([timestamp, invocation_id, attempt + 1, state])
            time.sleep(sleep_time)
        
        # if reached max_attempts without a conclusive state, return the last fetched invocation
        writer.writerow([timestamp, invocation_id, attempt + 1, "Max attempts reached"])
    return invocation


def shotgun_main(gi, dataset, workflow_id):

    for project_id, dataset_link_list in dataset.items():
        
        # create history
        print(f'creating new history for {project_id}')
        history_id = gi.histories.create_history(project_id)['id']
        print(f'new history with id {history_id} created for {project_id}')

        # keep record of project to history id
        update_project_to_history_id_info({project_id: history_id})
        
        # upload dataset
        print(f'uploading datasets to {project_id}')
        dataset_links = '\n'.join(dataset_link_list)
        upload_result = upload_dataset(gi, dataset_links, history_id)['outputs']
        print(upload_result)
        
        # wait for uploading complete
        cnt = 1
        while True:
            state_ids = gi.histories.show_history(history_id, contents=False)['state_ids']
            if len(state_ids['ok']) == len(dataset_link_list):
                print(f'uploading datasets to {project_id} successful')
                break
            else:
                print(f'waitting for the dataset to be available {cnt}')
                cnt = cnt + 1
                time.sleep(60)
        
        # create dataset collection
        cd = create_paired_dataset_collection(upload_result)
        collection_response = gi.histories.create_dataset_collection(
            history_id=history_id,
            collection_description=cd
        )
        print(collection_response)
        
        # get the workflow, the dataset, create dataset map for workflow inputs
        wf = gi.workflows.show_workflow(workflow_id)
        dataset_id = collection_response['id']
        dataset_map = {'src': 'hdca', 'id': dataset_id}
        
        # invoke workflow
        invoke_response = gi.workflows.invoke_workflow(
            wf['id'],
            inputs={ wf['inputs']['0']['uuid']: dataset},
            history_id=history_id,
            inputs_by='step_uuid',
        )

        print(invoke_response)

        # monitor workflow execution
        invocation_id = invoke_response['id']
        monitor_workflow_execution(gi, invocation_id)


In [5]:
dataset = {'PRJNA390460': ['ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR589/003/SRR5890763/SRR5890763_1.fastq.gz', 'ftp://ftp.sra.ebi.ac.uk/vol1/fastq/SRR589/003/SRR5890763/SRR5890763_2.fastq.gz']}

shotgun_main(gi, dataset, 'dde25e07f2db3f7c')

In [1]:
import pandas as pd
from scipy import stats

# Create a dictionary representing your data
data = {
    'Sample': ['Extra #21', 'Extra #22', 'Extra #25', 'Rubi #11', 'Rubi #13', 'Rubi #9'],
    'Chao1_Original': [48, 67.5, 98.6, 70.5, 90.2, 117.2],
    'Chao1_Re-analysis': [42, 61, 94, 65, 89, 104],
    'Shannon_Original': [2.07, 2.45, 2.53, 1.88, 2.16, 3.14],
    'Shannon_Re-analysis': [2.50, 2.94, 3.31, 3.46, 2.71, 4.16],
    'Simpson_Original': [0.83, 0.87, 0.86, 0.72, 0.77, 0.92],
    'Simpson_Re-analysis': [0.76, 0.80, 0.82, 0.82, 0.65, 0.89]
}

# Convert the dictionary to a pandas DataFrame
df = pd.DataFrame(data)

indexes = ['Chao1', 'Shannon', 'Simpson']

for index in indexes:
    t, p = stats.ttest_rel(df[f'{index}_Original'], df[f'{index}_Re-analysis'])
    print(f"{index} paired t-test:\n t = {t}\n p = {p}\n")

Chao1 paired t-test:
 t = 3.844832916010616
 p = 0.01206427941392628

Shannon paired t-test:
 t = -4.536992108267604
 p = 0.006185714676929359

Simpson paired t-test:
 t = 1.2573751347659914
 p = 0.2641501551547413

