# Member's Machine Actionable Data Management Plans

Data management plans (DMPs) are documents accompanying research proposals and project outputs. DMPs are created as free-form text and describe the data and tools employed in scientific investigations. They are often seen as an administrative exercise and not as an integral part of research practice. Machine Actionable DMPs (maDMPs) takes this concept further by 

This notebook displays in a human-friendly all the DMPs related to Member. By the end of this notebook, you will be able to succinctly display all the DMPs related to a DataCite Member.


In [40]:
import json
import pandas as pd
import numpy as np
from dfply import *
import altair.vega.v5 as alt


In [154]:
# Prepare the GraphQL client
import requests
from IPython.display import display, Markdown
from gql import gql, Client
from gql.transport.requests import RequestsHTTPTransport

_transport = RequestsHTTPTransport(
    url='https://api.stage.datacite.org/graphql',
    use_json=True,
)

client = Client(
    transport=_transport,
    fetch_schema_from_transport=True,
)

## Fetching Data

We obtain all the data from the DataCite GraphQL API.


In [156]:
 # Generate the GraphQL query to retrieve up to 100 outputs of University of Oxford, with at least 100 views each.
query_params = {
    "rorId" : "https://ror.org/00k4n6c32",
    "funderId" : "https://doi.org/10.13039/501100006568",
    "repositoryId" : "bl.oxdb"
}

organizationQuery = gql("""query getOutputs($rorId: ID!)
{
  organization(id: $rorId) {
    name
    dataManagementPlans {
      totalCount
      nodes {
        id
        title: titles(first: 1) {
          title
        }
        datasets: references(resourceTypeId: "dataset") {
          totalCount
          nodes {
            id: doi
            name: titles(first: 1) {
              title
            }
          }
        }
        organisations: contributors(contributorType: "HostingInstitution") {
          id
          name
          affiliation {
            id
          }
        }
        funders: fundingReferences {
          id: funderIdentifier
          funderIdentifierType
          name: funderName
        }
        people: creators {
          id
          name
          affiliation {
            id
          }
        }
      }
    }
  }
}
""")

funderQuery = gql("""query getOutputs($funderId: ID!)
{
  funder(id: $funderId) {
    name
    dataManagementPlans {
      totalCount
      nodes {
        id
        title: titles(first: 1) {
          title
        }
        datasets: references(resourceTypeId: "dataset") {
          totalCount
          nodes {
            id: doi
            name: titles(first: 1) {
              title
            }
          }
        }
        organisations: contributors(contributorType: "HostingInstitution") {
          id
          name
          affiliation {
            id
          }
        }
        funders: fundingReferences {
          id: funderIdentifier
          funderIdentifierType
          name: funderName
        }
        people: creators {
          id
          name
          affiliation {
            id
          }
        }
      }
    }
  }
}
""")

repositoryQuery = gql("""query getOutputs($repositoryId: ID!)
{
  repository(id: $repositoryId) {
    name
    dataManagementPlans {
      totalCount
      nodes {
        id
        title: titles(first: 1) {
          title
        }
        datasets: references(resourceTypeId: "dataset") {
          totalCount
          nodes {
            id: doi
            name: titles(first: 1) {
              title
            }
          }
        }
        organisations: contributors(contributorType: "HostingInstitution") {
          id
          name
          affiliation {
            id
          }
        }
        funders: fundingReferences {
          id: funderIdentifier
          funderIdentifierType
          name: funderName
        }
        people: creators {
          id
          name
          affiliation {
            id
          }
        }
      }
    }
  }
}
""")
 

In [158]:
def get_data(type):
    if type == "organization":
        return client.execute(organizationQuery, variable_values=json.dumps(query_params))["organization"]["dataManagementPlans"]["nodes"]
    elif type == "funder":
        return client.execute(funderQuery, variable_values=json.dumps(query_params))["funder"]["dataManagementPlans"]["nodes"]
    else:
        return client.execute(repositoryQuery, variable_values=json.dumps(query_params))["repository"]["dataManagementPlans"]["nodes"]


## Data Transformation

Simple transformations are performed to convert the graphql response into an array that can be used..

In [160]:
def get_series_size(series_element):
    return len(series_element)

In [162]:
def get_dataset_nodes(series_element):
    return series_element['nodes']

In [164]:
def get_title(series_element):
    return series_element[0]['title']

In [166]:
def transform_dmps(dataframe):
    """Modifies each item to include attributes needed for the node visulisation

    Parameters:
    dataframe (dataframe): A dataframe with all the itemss
    parent (int): The id of the parent node

    Returns:
    dataframe:Returning vthe same dataframe with new attributes

   """
    if (dataframe) is None:
        return pd.DataFrame() 
    else: 
        dataframe = (dataframe >>
        mutate(
            datasets = X.datasets.apply(get_dataset_nodes)
        ))

        return (dataframe >>
        mutate(
            id = X.id,
            title = X.title.apply(get_title),
            datasets = X.datasets.apply(get_series_size),
            organisations = X.organisations.apply(get_series_size),
            funders = X.funders.apply(get_series_size),
            people = X.people.apply(get_series_size)
        ))
  

In [177]:
def processTable(type):
    data = get_data(type)
    if len(data) == 0:
        table = None
    else:
        table = pd.DataFrame(data,columns=data[0].keys())
    return transform_dmps(table).style

In [178]:
organization = processTable("organization")
funder = processTable("funder")
repository = processTable("repository")

In [179]:
organization

Unnamed: 0,id,title,datasets,organisations,funders,people
0,https://handle.test.datacite.org/10.4124/test/.879w8,UEL Data Management Plan template,0,0,1,1


In [180]:
funder

In [181]:
repository