# Create Dataframe from JSON
A notebook for creating OpenRefine procedures that change R5.1 JSONs into dataframes for use in testing.

In [1]:
from pathlib import Path

In [2]:
def add_stats_source_and_report_type_fields(statistics_source_ID, report_type):
    return f"""
        {{
            "op": "core/column-addition",
            "engineConfig": {{
                "facets": [],
                "mode": "row-based"
            }},
            "baseColumnName": "platform",
            "expression": "grel:'{report_type}'",
            "onError": "set-to-blank",
            "newColumnName": "report_type",
            "columnInsertIndex": 1,
            "description": "Create column ``report_type`` with the report type"
        }},
        {{
            "op": "core/column-addition",
            "engineConfig": {{
                "facets": [],
                "mode": "row-based"
            }},
            "baseColumnName": "platform",
            "expression": "grel:{statistics_source_ID}",
            "onError": "set-to-blank",
            "newColumnName": "statistics_source_ID",
            "columnInsertIndex": 1,
            "description": "Create column ``statistics_source_ID`` with statistics source ID"
        }},
    """

In [3]:
def rename_and_fill_down_platform():
    return """
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Platform",
            "newColumnName": "platform",
            "description": "Rename column _ - Platform to platform"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Platform",
            "newColumnName": "platform",
            "description": "Rename column _ - Items - _ - Platform to platform"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "platform",
            "description": "Fill down cells in column platform"
        },
    """

def rename_and_fill_down_publisher():
    return """
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Publisher",
            "newColumnName": "publisher",
            "description": "Rename column _ - Publisher to publisher"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Publisher",
            "newColumnName": "publisher",
            "description": "Rename column _ - Items - _ - Publisher to publisher"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "publisher",
            "description": "Fill down cells in column publisher"
        },
    """

def rename_fill_down_and_move_proprietary_ID():
    return """
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - Proprietary",
            "newColumnName": "proprietary_ID",
            "description": "Rename column _ - Item_ID - Proprietary to proprietary_ID"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "proprietary_ID",
            "description": "Fill down cells in column proprietary_ID"
        },
        {
            "op": "core/column-move",
            "columnName": "proprietary_ID",
            "index": 2,
            "description": "Move column proprietary_ID to before `Performance` fields"
        },
    """

def rename_and_fill_down_DR_resource_name():
    return """
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Database",
            "newColumnName": "resource_name",
            "description": "Rename column _ - Database to resource_name"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "resource_name",
            "description": "Fill down cells in column resource_name"
        },
    """

def rename_fill_down_and_move_TR_fields():
    return """
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - DOI",
            "newColumnName": "DOI",
            "description": "Rename column _ - Item_ID - DOI to DOI"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "DOI",
            "description": "Fill down cells in column DOI"
        },
        {
            "op": "core/column-move",
            "columnName": "DOI",
            "index": 2,
            "description": "Move column DOI to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - ISBN",
            "newColumnName": "ISBN",
            "description": "Rename column _ - Item_ID - Print_ISBN to ISBN"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "ISBN",
            "description": "Fill down cells in column ISBN"
        },
        {
            "op": "core/column-move",
            "columnName": "ISBN",
            "index": 2,
            "description": "Move column ISBN to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - Print_ISSN",
            "newColumnName": "print_ISSN",
            "description": "Rename column _ - Item_ID - Print_ISSN to print_ISSN"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "print_ISSN",
            "description": "Fill down cells in column print_ISSN"
        },
        {
            "op": "core/column-move",
            "columnName": "print_ISSN",
            "index": 2,
            "description": "Move column print_ISSN to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - Online_ISSN",
            "newColumnName": "online_ISSN",
            "description": "Rename column _ - Item_ID - Online_ISSN to online_ISSN"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "online_ISSN",
            "description": "Fill down cells in column online_ISSN"
        },
        {
            "op": "core/column-move",
            "columnName": "online_ISSN",
            "index": 2,
            "description": "Move column online_ISSN to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Attribute_Performance - _ - YOP",
            "newColumnName": "YOP",
            "description": "Rename column _ - Attribute_Performance - _ - YOP to YOP"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "YOP",
            "expression": "value.toNumber()",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Change values in column YOP to numbers"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Attribute_Performance - _ - Access_Type",
            "newColumnName": "access_type",
            "description": "Rename column _ - Attribute_Performance - _ - Access_Type to access_type"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Title",
            "newColumnName": "resource_name",
            "description": "Rename column _ - Title to resource_name"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "resource_name",
            "description": "Fill down cells in column resource_name"
        },
    """

def rename_fill_down_and_move_IR_fields():
    return """
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Item",
            "newColumnName": "resource_name",
            "description": "Rename column _ - Items - _ - Item to resource_name"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Authors - _ - Name",
            "newColumnName": "authors",
            "description": "Rename column _ - Items - _ - Authors - _ - Name to authors"
        },
        {
            "op": "core/column-addition",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "baseColumnName": "resource_name",
            "expression": "grel:value",
            "onError": "set-to-blank",
            "newColumnName": "temp",
            "columnInsertIndex": 0,
            "description": "Create column temp copying resource names at beginning"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
            "facets": [],
            "mode": "record-based"
            },
            "columnName": "authors",
            "expression": "grel:if(row.index-row.record.fromRowIndex==0,row.record.cells['authors'].value.join('; '),null)",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Combine all parent author values separated by semicolons in the first row of the record"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
            "facets": [],
            "mode": "record-based"
            },
            "columnName": "authors",
            "expression": "grel:if(isBlank(value),null,value)",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "For records with no authors that now have empty strings, null the value"
        },
        {
            "op": "core/row-removal",
            "engineConfig": {
            "facets": [
                {
                "type": "list",
                "name": "temp",
                "expression": "grel:filter(row.columnNames,cn,isNonBlank(cells[cn].value)).join('|')=='platform|publisher'",
                "columnName": "temp",
                "invert": false,
                "omitBlank": false,
                "omitError": false,
                "selection": [
                    {
                    "v": {
                        "v": true,
                        "l": "true"
                    }
                    }
                ],
                "selectBlank": false,
                "selectError": false
                }
            ],
            "mode": "row-based"
            },
            "description": "Remove rows with only platform and publisher data"
        },
        {
            "op": "core/column-removal",
            "columnName": "temp",
            "description": "Remove column temp"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Publication_Date",
            "newColumnName": "publication_date",
            "description": "Rename column _ - Items - _ - Publication_Date to publication_date"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "publication_date",
            "expression": "value.toDate()",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Change values in column publication_date to dates"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Article_Version",
            "newColumnName": "article_version",
            "description": "Rename column _ - Items - _ - Article_Version to article_version"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Item_ID - DOI",
            "newColumnName": "DOI",
            "description": "Rename column _ - Items - _ - Item_ID - DOI to DOI"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Item_ID - Proprietary",
            "newColumnName": "proprietary_ID",
            "description": "Rename column _ - Items - _ - Item_ID - Proprietary to proprietary_ID"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Item_ID - Print_ISSN",
            "newColumnName": "print_ISSN",
            "description": "Rename column _ - Items - _ - Item_ID - Print_ISSN to print_ISSN"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Attribute_Performance - _ - YOP",
            "newColumnName": "YOP",
            "description": "Rename column _ - Items - _ - Attribute_Performance - _ - YOP to YOP"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "YOP",
            "expression": "value.toNumber()",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Change values in column YOP to numbers"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Attribute_Performance - _ - Access_Type",
            "newColumnName": "access_type",
            "description": "Rename column _ - Items - _ - Attribute_Performance - _ - Access_Type to access_type"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Authors - _ - Name",
            "newColumnName": "parent_authors",
            "description": "Rename column _ - Authors - _ - Name to parent_authors"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
            "facets": [],
            "mode": "record-based"
            },
            "columnName": "parent_authors",
            "expression": "grel:if(row.index-row.record.fromRowIndex==0,row.record.cells['parent_authors'].value.join('; '),null)",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Combine all parent author values separated by semicolons in the first row of the record"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
            "facets": [],
            "mode": "record-based"
            },
            "columnName": "parent_authors",
            "expression": "grel:if(isBlank(value),null,value)",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "For records with no parent authors, null the value in the first row"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "parent_authors",
            "description": "Fill down cells in column parent_authors"
        },
        {
            "op": "core/column-move",
            "columnName": "parent_authors",
            "index": 2,
            "description": "Move column parent_authors to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Data_Type",
            "newColumnName": "parent_data_type",
            "description": "Rename column _ - Data_Type to parent_data_type"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "parent_data_type",
            "description": "Fill down cells in column parent_data_type"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - DOI",
            "newColumnName": "parent_DOI",
            "description": "Rename column _ - Item_ID - DOI to parent_DOI"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "parent_DOI",
            "description": "Fill down cells in column parent_DOI"
        },
        {
            "op": "core/column-move",
            "columnName": "parent_DOI",
            "index": 2,
            "description": "Move column parent_DOI to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - Proprietary",
            "newColumnName": "parent_proprietary_ID",
            "description": "Rename column _ - Item_ID - Proprietary to parent_proprietary_ID"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "parent_proprietary_ID",
            "description": "Fill down cells in column parent_proprietary_ID"
        },
        {
            "op": "core/column-move",
            "columnName": "parent_proprietary_ID",
            "index": 2,
            "description": "Move column parent_proprietary_ID to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - ISBN",
            "newColumnName": "parent_ISBN",
            "description": "Rename column _ - Item_ID - ISBN to parent_ISBN"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "parent_ISBN",
            "description": "Fill down cells in column parent_ISBN"
        },
        {
            "op": "core/column-move",
            "columnName": "parent_ISBN",
            "index": 2,
            "description": "Move column parent_ISBN to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - Print_ISSN",
            "newColumnName": "parent_print_ISSN",
            "description": "Rename column _ - Item_ID - Print_ISSN to parent_print_ISSN"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "parent_print_ISSN",
            "description": "Fill down cells in column parent_print_ISSN"
        },
        {
            "op": "core/column-move",
            "columnName": "parent_print_ISSN",
            "index": 2,
            "description": "Move column parent_print_ISSN to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Item_ID - Online_ISSN",
            "newColumnName": "parent_online_ISSN",
            "description": "Rename column _ - Item_ID - Online_ISSN to parent_online_ISSN"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "record-based"
            },
            "columnName": "parent_online_ISSN",
            "description": "Fill down cells in column parent_online_ISSN"
        },
        {
            "op": "core/column-move",
            "columnName": "parent_online_ISSN",
            "index": 2,
            "description": "Move column parent_online_ISSN to before `Performance` fields"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Title",
            "newColumnName": "parent_title",
            "description": "Rename column _ - Title to parent_title"
        },
        {
            "op": "core/fill-down",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "parent_title",
            "description": "Fill down cells in column parent_title"
        }
    """

def rename_data_type_and_access_method():
    return """
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Attribute_Performance - _ - Data_Type",
            "newColumnName": "data_type",
            "description": "Rename column _ - Attribute_Performance - _ - Data_Type to data_type"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Attribute_Performance - _ - Access_Method",
            "newColumnName": "access_method",
            "description": "Rename column _ - Attribute_Performance - _ - Access_Method to access_method"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Attribute_Performance - _ - Data_Type",
            "newColumnName": "data_type",
            "description": "Rename column _ - Items - _ - Attribute_Performance - _ - Data_Type to data_type"
        },
        {
            "op": "core/column-rename",
            "oldColumnName": "_ - Items - _ - Attribute_Performance - _ - Access_Method",
            "newColumnName": "access_method",
            "description": "Rename column _ - Items - _ - Attribute_Performance - _ - Access_Method to access_method"
        },
    """

In [4]:
def pivot_performance_fields(first_field, last_field_from_split):
    return f"""
        {{
            "op": "core/transpose-columns-into-rows",
            "startColumnName": "{first_field}",
            "columnCount": -1,
            "ignoreBlankCells": true,
            "fillDown": true,
            "separator": null,
            "keyColumnName": "Fields",
            "valueColumnName": "usage_count",
            "description": "Pivot `Performance` fields"
        }},
        {{
            "op": "core/column-split",
            "engineConfig": {{
                "facets": [],
                "mode": "record-based"
            }},
            "columnName": "Fields",
            "guessCellType": true,
            "removeOriginalColumn": true,
            "mode": "separator",
            "separator": " - ",
            "regex": false,
            "maxColumns": 0,
            "description": "Split column Fields at ' - '"
        }},
        {{
            "op": "core/column-rename",
            "oldColumnName": "Fields {last_field_from_split-1}",
            "newColumnName": "metric_type",
            "description": "Rename Fields {last_field_from_split-1} to metric_type"
        }},
        {{
            "op": "core/column-rename",
            "oldColumnName": "Fields {last_field_from_split}",
            "newColumnName": "usage_date",
            "description": "Rename Fields {last_field_from_split} to usage_date"
        }},
    """

def remove_null_rows():
    return """
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "usage_count",
            "expression": "value.toNumber()",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Make cells in column usage_count numbers"
        },
        {
            "op": "core/row-removal",
            "engineConfig": {
                "facets": [
                    {
                        "type": "list",
                        "name": "usage_count",
                        "expression": "grel:or(isBlank(value),value==0)",
                        "columnName": "usage_count",
                        "invert": false,
                        "omitBlank": false,
                        "omitError": false,
                        "selection": [
                            {
                                "v": {
                                    "v": true,
                                    "l": "true"
                                }
                            }
                        ],
                        "selectBlank": false,
                        "selectError": false
                    }
                ],
                "mode": "row-based"
            },
            "description": "Remove rows with no usage"
        },
    """

def make_usage_date_dates():
    return """
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "usage_date",
            "expression": "value.toDate()",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Change values in column usage_date to dates"
        },
    """

In [5]:
def make_final_field_adjustments():
    return """
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "DOI",
            "expression": "grel:if(or(value=='',value==' '),null,value)",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Change whitespace values to null in column ``DOI``"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "proprietary_ID",
            "expression": "grel:if(or(value=='',value==' '),null,value)",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Change whitespace values to null in column ``proprietary_ID``"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "online_ISSN",
            "expression": "grel:if(or(value=='',value==' '),null,value)",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Change whitespace values to null in column ``online_ISSN``"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "print_SSN",
            "expression": "grel:if(or(value=='',value==' '),null,value)",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Change whitespace values to null in column ``print_ISSN``"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "resource_name",
            "expression": "grel:value.replace(/\\n/,'')",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Remove errant newlines from ``resource_name`` values"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "resource_name",
            "expression": "grel:value.reinterpret('utf-8').unescape('html')",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Ensure values in column ``resource_name`` are encoded with UTF-8"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "publisher",
            "expression": "grel:value.reinterpret('utf-8').unescape('html')",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Ensure values in column ``publisher`` are encoded with UTF-8"
        },
        {
            "op": "core/text-transform",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "columnName": "platform",
            "expression": "grel:value.reinterpret('utf-8').unescape('html')",
            "onError": "keep-original",
            "repeat": false,
            "repeatCount": 10,
            "description": "Ensure values in column ``platform`` are encoded with UTF-8"
        },
        {
            "op": "core/column-reorder",
            "columnNames": [
                "statistics_source_ID",
                "report_type",
                "resource_name",
                "publisher",
                "publisher_ID",
                "platform",
                "authors",
                "publication_date",
                "article_version",
                "DOI",
                "proprietary_ID",
                "ISBN",
                "print_ISSN",
                "online_ISSN",
                "URI",
                "data_type",
                "section_type",
                "YOP",
                "access_type",
                "access_method",
                "parent_title",
                "parent_authors",
                "parent_publication_date",
                "parent_article_version",
                "parent_data_type",
                "parent_DOI",
                "parent_proprietary_ID",
                "parent_ISBN",
                "parent_print_ISSN",
                "parent_online_ISSN",
                "parent_URI",
                "metric_type",
                "usage_date",
                "usage_count"
            ],
            "description": "Reorder columns"
        },
        {
            "op": "core/column-addition",
            "engineConfig": {
                "facets": [],
                "mode": "row-based"
            },
            "baseColumnName": "statistics_source_ID",
            "expression": "grel:'['+forEach(row.columnNames,field,if(isBlank(cells[field].value),'None',if(cells[field].value.type()=='date','\\\"'+cells[field].value.toString('yyyy-MM-dd')+'\\\"',if(cells[field].value.type()=='string','\\\"'+cells[field].value.replace('\\\"','\\\\\\\"')+'\\\"',cells[field].value.round())))).join(', ')+'],'",
            "onError": "set-to-blank",
            "newColumnName": "df",
            "columnInsertIndex": 0,
            "description": "Create column ``df`` by combining all the other fields and formatting them for insertion into a dataframe"
        },
    """

## 1. Create Path to JSON

In [6]:
JSON_file_path = Path(f"for_OpenRefine.json")

## 2. Set Variables

In [7]:
statistics_source_ID = 3
report_type = "IR"
first_performance_field = "_ - Items - _ - Attribute_Performance - _ - Performance - Total_Item_Investigations - 2024-10"




## 3. Rename and Fill Down Metadata Fields
**PR**: Rename and fill down `platform`. Rename `data_type` and `access_method`.

**DR**: Rename and fill down `resource_name`, `publisher`, `platform`, and `proprietary_ID`. Rename `data_type` and `access_method`.

**TR**: Rename and fill down `resource_name`, `publisher`, `platform`, `DOI`, `proprietary_ID`, `ISBN`, `print_ISSN`, and `online_ISSN`. Rename `data_type`, `YOP`, `access_type`, and `access_method`.

**IR**: Rename and fill down `parent_title`, `parent_authors`, `parent_data_type`, `parent_DOI`, `parent_proprietary_ID`, `parent_ISBN`, `parent_print_ISSN`, and `parent_online_ISSN`. Rename `resource_name`, `publisher`, `platform`, `authors`, `publication_date`, `article_version`, `DOI`, `proprietary_ID`, `print_ISSN`, `data_type`, `YOP`, `access_type`, and `access_method`.

In [8]:
with open(JSON_file_path, 'w') as file:
    # the first field in TR is for platform, so using `file.write(rename_and_fill_down_platform())` here means many metadata fields aren't filled down properly in that report
    file.write(rename_and_fill_down_publisher())
    if report_type == "PR":
        file.write(rename_and_fill_down_platform())
    if report_type == "DR":
        file.write(rename_and_fill_down_platform())
        file.write(rename_fill_down_and_move_proprietary_ID())
        file.write(rename_and_fill_down_DR_resource_name())
    elif report_type == "TR":
        file.write(rename_fill_down_and_move_proprietary_ID())
        file.write(rename_fill_down_and_move_TR_fields())
        file.write(rename_and_fill_down_platform())
    elif report_type == "IR":
        file.write(rename_and_fill_down_platform())
        file.write(rename_fill_down_and_move_IR_fields())
    file.write(rename_data_type_and_access_method())

## 4. Add `statistics_source_ID` and `report_type` Fields
This is after the metadata field renaming so one of those renamed fields can serve as the base field for the OpenRefine operation.

In [9]:
with open(JSON_file_path, 'a') as file:
    file.write(add_stats_source_and_report_type_fields(statistics_source_ID, report_type))

## 5. Pivot `Performance` Fields
The split operation allows the same OpenRefine JSON to be used for all reports.

In [10]:
number_of_fields_when_split = len(first_performance_field.split(' - '))
with open(JSON_file_path, 'a') as file:
    file.write(pivot_performance_fields(first_performance_field, number_of_fields_when_split))
    file.write(remove_null_rows())
    file.write(make_usage_date_dates())

## 6. Create Dataframe

In [11]:
with open(JSON_file_path, 'a') as file:
    file.write(make_final_field_adjustments())