# Load description for each variable in each pair

Recreates and extends analysis from https://github.com/amit-sharma/chatgpt-causality-pairs 
Focuses on analysis of the Tübingen dataset from https://webdav.tuebingen.mpg.de/cause-effect/

In [1]:
from dotenv import load_dotenv
from typing import Dict, List, Tuple
import guidance
import os

load_dotenv()

api_key = os.getenv("api_key")
organization = os.getenv("organization")

gpt4 = guidance.llms.OpenAI(api_key=api_key, organization=organization, model="gpt-4")
davinci = guidance.llms.OpenAI(api_key=api_key, organization=organization, model="text-davinci-003")

In [2]:
from tuebingen_model_suggester import TuebingenModelSuggester, Strategy
modeler = TuebingenModelSuggester()

In [3]:
# TODO download the dataset if needed and then read in the CSV
saved_pairs_info: Dict = {}

saved_pairs_info['pair0001'] = {'var1': ' Altitude', 'var2': ' Temperature', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0001:\n\nDWD data (Deutscher Wetterdienst)\n\ndata was taken at 349 stations\n\ntaken from\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\nmore recent  link (Jan 2010):\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\n\nx: altitude\n\ny: temperature (average over 1961-1990)\n\n', 'var1_desc': 'Altitude refers to the height of an object or point in relation to sea level or ground level.', 'var2_desc': 'Temperature is a measure of the average kinetic energy of the particles in a system, often used in meteorology to indicate the degree of heat or cold in the atmosphere.'}
saved_pairs_info['pair0002'] = {'var1': ' Altitude', 'var2': ' Precipitation', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0002:\n\nDWD data (Deutscher Wetterdienst)\n\ndata was taken at 349 stations\n\ntaken from\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\nmore recent  link (Jan 2010):\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\n\nx: altitude\n\ny: precipitation (yearly value averaged over 1961-1990)\n\n\n', 'var1_desc': 'Altitude is a geographical concept referring to the height of a specific location above a fixed reference point, often the mean sea level.', 'var2_desc': "Precipitation is a meteorological phenomenon that includes all forms of water, liquid or solid, falling from the atmosphere to the earth's surface, typically measured in terms of the amount of water (in millimeters) that is deposited over a specified area over a certain period of time."}
saved_pairs_info['pair0003'] = {'var1': ' Longitude', 'var2': ' Temperature', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0003:\n\nDWD data (Deutscher Wetterdienst)\n\ndata was taken at 349 stations\n\ntaken from\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\nmore recent  link (Jan 2010):\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\n\nx: longitude\n\ny: temperature (averaged over 1961-1990)\n\n', 'var1_desc': "Longitude is a geographic coordinate that specifies the east-west position of a point on the Earth's surface, measured in degrees from the prime meridian.", 'var2_desc': 'Temperature is a quantitative measure of the degree of heat present in a substance or an object, often expressed in degrees, and in this context, it refers to the averaged atmospheric temperature recorded at various stations by the Deutscher Wetterdienst from 1961-1990.'}
saved_pairs_info['pair0004'] = {'var1': ' Altitude', 'var2': ' Sunshine hours', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0004:\n\nDWD data (Deutscher Wetterdienst)\n\ndata was taken at 349 stations\n\ntaken from\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\nmore recent  link (Jan 2010):\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\nmore recent link (Oct 2012):\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__akt__node.html%3F__nnn%3Dtrue\n\nx: altitude\n\ny: sunshine (yearly value averaged over 1961-1990)\n\n', 'var1_desc': 'Altitude is a geographical term referring to the height of a specific location above a fixed reference point, often the mean sea level.', 'var2_desc': 'Sunshine hours refer to the total number of hours during which the sun is visible and directly contributing to daylight in a specific location, typically measured on an annual basis.'}
saved_pairs_info['pair0005'] = {'var1': ' Age', 'var2': ' Length', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0005:\n\nhttps://archive.ics.uci.edu/ml/datasets/Abalone\n\n1. Title of Database: Abalone data\n\n2. Sources:\n\n   (a) Original owners of database:\n\tMarine Resources Division\n\tMarine Research Laboratories - Taroona\n\tDepartment of Primary Industry and Fisheries, Tasmania\n\tGPO Box 619F, Hobart, Tasmania 7001, Australia\n\t(contact: Warwick Nash +61 02 277277, wnash@dpi.tas.gov.au)\n\n   (b) Donor of database:\n\tSam Waugh (Sam.Waugh@cs.utas.edu.au)\n\tDepartment of Computer Science, University of Tasmania\n\tGPO Box 252C, Hobart, Tasmania 7001, Australia\n\n   (c) Date received: December 1995\n\n3. Attribute information:\n\n   Given is the attribute name, attribute type, the measurement unit and a\n   brief description.  \n\n\tName\t\tData Type\tMeas.\tDescription\n\t----\t\t---------\t-----\t-----------\nx:\tRings\t\tinteger\t\t\t+1.5 gives the age in years\ny:\tLength\t\tcontinuous\tmm\tLongest shell measurement\n\n\n', 'var1_desc': 'In the context of the Abalone dataset, the concept of "Age" is derived from the "Rings" attribute, where the age in years is calculated by adding 1.5 to the integer value of the rings.', 'var2_desc': "In the context of the Abalone dataset, 'Length' is a continuous variable measured in millimeters, representing the longest shell measurement of the abalone."}
saved_pairs_info['pair0006'] = {'var1': ' Age', 'var2': ' Shell weight', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0006:\n\nhttps://archive.ics.uci.edu/ml/datasets/Abalone\n\n1. Title of Database: Abalone data\n\n2. Sources:\n\n   (a) Original owners of database:\n\tMarine Resources Division\n\tMarine Research Laboratories - Taroona\n\tDepartment of Primary Industry and Fisheries, Tasmania\n\tGPO Box 619F, Hobart, Tasmania 7001, Australia\n\t(contact: Warwick Nash +61 02 277277, wnash@dpi.tas.gov.au)\n\n   (b) Donor of database:\n\tSam Waugh (Sam.Waugh@cs.utas.edu.au)\n\tDepartment of Computer Science, University of Tasmania\n\tGPO Box 252C, Hobart, Tasmania 7001, Australia\n\n   (c) Date received: December 1995\n\n3. Attribute information:\n\n   Given is the attribute name, attribute type, the measurement unit and a\n   brief description.  \n\n\tName\t\tData Type\tMeas.\tDescription\n\t----\t\t---------\t-----\t-----------\nx:\tRings\t\tinteger\t\t\t+1.5 gives the age in years\ny:\tShell weight\tcontinuous\tgrams\tafter being dried\n\n\n', 'var1_desc': "In the context of the Abalone dataset, Age is a derived attribute, calculated by adding 1.5 to the Rings attribute, which represents the number of rings in the abalone's shell, a common biological indicator of age in many species.", 'var2_desc': "Shell weight is a continuous variable measured in grams, representing the weight of an abalone's shell after it has been dried."}
saved_pairs_info['pair0007'] = {'var1': ' Age', 'var2': ' Diameter', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0007:\n\nhttps://archive.ics.uci.edu/ml/datasets/Abalone\n\n1. Title of Database: Abalone data\n\n2. Sources:\n\n   (a) Original owners of database:\n\tMarine Resources Division\n\tMarine Research Laboratories - Taroona\n\tDepartment of Primary Industry and Fisheries, Tasmania\n\tGPO Box 619F, Hobart, Tasmania 7001, Australia\n\t(contact: Warwick Nash +61 02 277277, wnash@dpi.tas.gov.au)\n\n   (b) Donor of database:\n\tSam Waugh (Sam.Waugh@cs.utas.edu.au)\n\tDepartment of Computer Science, University of Tasmania\n\tGPO Box 252C, Hobart, Tasmania 7001, Australia\n\n   (c) Date received: December 1995\n\n3. Attribute information:\n\n   Given is the attribute name, attribute type, the measurement unit and a\n   brief description.  \n\n\tName\t\tData Type\tMeas.\tDescription\n\t----\t\t---------\t-----\t-----------\nx:\tRings\t\tinteger\t\t\t+1.5 gives the age in years\ny:\tDiameter\tcontinuous\tmm\tperpendicular to length\n\n\n', 'var1_desc': "In the context of the Abalone dataset, the concept of Age is derived from the 'Rings' attribute, where each ring represents a year, with an additional 1.5 years added to account for the abalone's early life.", 'var2_desc': 'Diameter in the context of the Abalone dataset refers to a continuous variable measured in millimeters, representing the perpendicular length of the abalone.'}
saved_pairs_info['pair0008'] = {'var1': ' Age', 'var2': ' Height', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0008:\n\nhttps://archive.ics.uci.edu/ml/datasets/Abalone\n\n1. Title of Database: Abalone data\n\n2. Sources:\n\n   (a) Original owners of database:\n\tMarine Resources Division\n\tMarine Research Laboratories - Taroona\n\tDepartment of Primary Industry and Fisheries, Tasmania\n\tGPO Box 619F, Hobart, Tasmania 7001, Australia\n\t(contact: Warwick Nash +61 02 277277, wnash@dpi.tas.gov.au)\n\n   (b) Donor of database:\n\tSam Waugh (Sam.Waugh@cs.utas.edu.au)\n\tDepartment of Computer Science, University of Tasmania\n\tGPO Box 252C, Hobart, Tasmania 7001, Australia\n\n   (c) Date received: December 1995\n\n3. Attribute information:\n\n   Given is the attribute name, attribute type, the measurement unit and a\n   brief description.  \n\n\tName\t\tData Type\tMeas.\tDescription\n\t----\t\t---------\t-----\t-----------\nx:\tRings\t\tinteger\t\t\t+1.5 gives the age in years\ny:\tHeight\t\tcontinuous\tmm\twith meat in shell\n\n\n', 'var1_desc': "In the context of the Abalone dataset, the concept of 'Age' refers to the age of the abalone, which is determined by adding 1.5 to the number of rings, an integer value, the abalone has.", 'var2_desc': "The 'Height' in the Abalone dataset is a continuous variable measured in millimeters, representing the measurement of the abalone (a type of marine snail) with its meat still in the shell."}
saved_pairs_info['pair0009'] = {'var1': ' Age', 'var2': ' Whole weight', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0009:\n\nhttps://archive.ics.uci.edu/ml/datasets/Abalone\n\n1. Title of Database: Abalone data\n\n2. Sources:\n\n   (a) Original owners of database:\n\tMarine Resources Division\n\tMarine Research Laboratories - Taroona\n\tDepartment of Primary Industry and Fisheries, Tasmania\n\tGPO Box 619F, Hobart, Tasmania 7001, Australia\n\t(contact: Warwick Nash +61 02 277277, wnash@dpi.tas.gov.au)\n\n   (b) Donor of database:\n\tSam Waugh (Sam.Waugh@cs.utas.edu.au)\n\tDepartment of Computer Science, University of Tasmania\n\tGPO Box 252C, Hobart, Tasmania 7001, Australia\n\n   (c) Date received: December 1995\n\n3. Attribute information:\n\n   Given is the attribute name, attribute type, the measurement unit and a\n   brief description.  \n\n\tName\t\tData Type\tMeas.\tDescription\n\t----\t\t---------\t-----\t-----------\nx:\tRings\t\tinteger\t\t\t+1.5 gives the age in years\ny:\tWhole weight\tcontinuous\tgrams\twhole abalone\n\n\n', 'var1_desc': "The concept of 'Age' in the context of the Abalone dataset refers to the age of the abalone, which is determined by adding 1.5 to the integer value of the 'Rings' attribute.", 'var2_desc': 'Whole weight is a continuous variable measured in grams that represents the total weight of the abalone.'}
saved_pairs_info['pair0010'] = {'var1': ' Age', 'var2': ' Shucked weight', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0010:\n\nhttps://archive.ics.uci.edu/ml/datasets/Abalone\n\n1. Title of Database: Abalone data\n\n2. Sources:\n\n   (a) Original owners of database:\n\tMarine Resources Division\n\tMarine Research Laboratories - Taroona\n\tDepartment of Primary Industry and Fisheries, Tasmania\n\tGPO Box 619F, Hobart, Tasmania 7001, Australia\n\t(contact: Warwick Nash +61 02 277277, wnash@dpi.tas.gov.au)\n\n   (b) Donor of database:\n\tSam Waugh (Sam.Waugh@cs.utas.edu.au)\n\tDepartment of Computer Science, University of Tasmania\n\tGPO Box 252C, Hobart, Tasmania 7001, Australia\n\n   (c) Date received: December 1995\n\n3. Attribute information:\n\n   Given is the attribute name, attribute type, the measurement unit and a\n   brief description.  \n\n\tName\t\tData Type\tMeas.\tDescription\n\t----\t\t---------\t-----\t-----------\nx:\tRings\t\tinteger\t\t\t+1.5 gives the age in years\ny:\tShucked weight\tcontinuous\tgrams\tweight of meat\n\n\n', 'var1_desc': "In the context of the Abalone dataset, Age is a derived attribute, calculated by adding 1.5 to the number of rings in the abalone, representing the abalone's age in years.", 'var2_desc': 'Shucked weight is a continuous variable measured in grams that represents the weight of the meat from the abalone.'}
saved_pairs_info['pair0011'] = {'var1': ' Age', 'var2': ' Viscera weight', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0011:\n\nhttps://archive.ics.uci.edu/ml/datasets/Abalone\n\n1. Title of Database: Abalone data\n\n2. Sources:\n\n   (a) Original owners of database:\n\tMarine Resources Division\n\tMarine Research Laboratories - Taroona\n\tDepartment of Primary Industry and Fisheries, Tasmania\n\tGPO Box 619F, Hobart, Tasmania 7001, Australia\n\t(contact: Warwick Nash +61 02 277277, wnash@dpi.tas.gov.au)\n\n   (b) Donor of database:\n\tSam Waugh (Sam.Waugh@cs.utas.edu.au)\n\tDepartment of Computer Science, University of Tasmania\n\tGPO Box 252C, Hobart, Tasmania 7001, Australia\n\n   (c) Date received: December 1995\n\n3. Attribute information:\n\n   Given is the attribute name, attribute type, the measurement unit and a\n   brief description.  \n\n\tName\t\tData Type\tMeas.\tDescription\n\t----\t\t---------\t-----\t-----------\nx:\tRings\t\tinteger\t\t\t+1.5 gives the age in years\ny:\tViscera weight\tcontinuous\tgrams\tgut weight (after bleeding)\n\n\n', 'var1_desc': "In the context of the Abalone dataset, Age is a derived attribute, calculated by adding 1.5 to the number of rings an abalone has, representing the abalone's age in years.", 'var2_desc': "Viscera weight is a continuous variable measured in grams, representing the weight of an abalone's gut after bleeding."}
saved_pairs_info['pair0012'] = {'var1': ' Age', 'var2': ' Wage per hour', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0012:\n\nCensus Income (KDD) dataset\nhttps://archive.ics.uci.edu/ml/datasets/Census-Income+(KDD)\n\nInfo from the UCI ML webpage:\n\n"This data set contains weighted census data extracted from the 1994 and 1995\ncurrent population surveys conducted by the U.S. Census Bureau.\n\nOriginal Owner:\n\nU.S. Census Bureau\nhttp://www.census.gov/\nUnited States Department of Commerce\n\nDonor:\n\nTerran Lane and Ronny Kohavi\nData Mining and Visualization\nSilicon Graphics.\nterran \'@\' ecn.purdue.edu, ronnyk \'@\' sgi.com \n\nThe instance weight indicates the number of people in the population that each\nrecord represents due to stratified sampling. To do real analysis and derive\nconclusions, this field must be used. This attribute should *not* be used in\nthe classifiers."\n\nWe did not use the instance weight here.\n\n\nx: Age\n\ny: Wage per hour\n\n\n', 'var1_desc': 'Age, in the context of the Census Income (KDD) dataset, refers to the number of years a person has lived, which is a demographic factor used to analyze the wage per hour in the U.S. population.', 'var2_desc': 'Wage per hour is a measure of income that represents the amount of money an individual earns for each hour of work.'}
saved_pairs_info['pair0013'] = {'var1': ' Displacement', 'var2': ' Fuel consumption', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0013:\n\nAuto-Mpg Data\n\nhttp://archive.ics.uci.edu/ml/datasets/Auto+MPG\n\n1. Sources:\n   (a) Origin:  This dataset was taken from the StatLib library which is\n                maintained at Carnegie Mellon University. The dataset was \n                used in the 1983 American Statistical Association Exposition.\n   (c) Date: July 7, 1993\n\n2. Past Usage:\n    -  See 2b (above)\n    -  Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning.\n       In Proceedings on the Tenth International Conference of Machine \n       Learning, 236-243, University of Massachusetts, Amherst. Morgan\n       Kaufmann.\n\n3. Relevant Information:\n\n   This dataset is a slightly modified version of the dataset provided in\n   the StatLib library.  In line with the use by Ross Quinlan (1993) in\n   predicting the attribute "mpg", 8 of the original instances were removed \n   because they had unknown values for the "mpg" attribute.  The original \n   dataset is available in the file "auto-mpg.data-original".\n\n   "The data concerns city-cycle fuel consumption in miles per gallon,\n    to be predicted in terms of 3 multivalued discrete and 5 continuous\n    attributes." (Quinlan, 1993)\n\nAttribute information:\n\nmpg:           continuous\ndisplacement:  continuous\n\nx: displacement\n\ny: mpg\n\n', 'var1_desc': "Displacement, in the context of the Auto-MPG dataset, is a continuous attribute representing the engine's size or capacity, typically measured in cubic centimeters (cc) or liters (L), which is used to predict the miles per gallon (mpg) of a vehicle.", 'var2_desc': 'Fuel consumption refers to the amount of fuel used by a vehicle over a certain distance, often measured in miles per gallon (mpg), and is influenced by factors such as engine displacement and driving conditions.'}
saved_pairs_info['pair0014'] = {'var1': ' Horse power', 'var2': ' Fuel consumption', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0014:\n\nAuto-Mpg Data\n\nhttp://archive.ics.uci.edu/ml/datasets/Auto+MPG\n\n1. Sources:\n   (a) Origin:  This dataset was taken from the StatLib library which is\n                maintained at Carnegie Mellon University. The dataset was \n                used in the 1983 American Statistical Association Exposition.\n   (c) Date: July 7, 1993\n\n2. Past Usage:\n    -  See 2b (above)\n    -  Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning.\n       In Proceedings on the Tenth International Conference of Machine \n       Learning, 236-243, University of Massachusetts, Amherst. Morgan\n       Kaufmann.\n\n3. Relevant Information:\n\n   This dataset is a slightly modified version of the dataset provided in\n   the StatLib library.  In line with the use by Ross Quinlan (1993) in\n   predicting the attribute "mpg", 8 of the original instances were removed \n   because they had unknown values for the "mpg" attribute.  The original \n   dataset is available in the file "auto-mpg.data-original".\n\n   "The data concerns city-cycle fuel consumption in miles per gallon,\n    to be predicted in terms of 3 multivalued discrete and 5 continuous\n    attributes." (Quinlan, 1993)\n\nAttribute information:\n\nmpg:         continuous\nhorsepower:  continuous\n\nx: horsepower\n\ny: mpg\n\n', 'var1_desc': "Horsepower is a unit of measurement signifying the power an engine produces, directly influencing a vehicle's speed and performance.", 'var2_desc': 'Fuel consumption refers to the amount of fuel a vehicle uses to travel a certain distance, often measured in miles per gallon (mpg), and can be influenced by factors such as horsepower and driving conditions.'}
saved_pairs_info['pair0015'] = {'var1': ' Weight', 'var2': ' Fuel consumption', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0015:\n\nAuto-Mpg Data\n\nhttp://archive.ics.uci.edu/ml/datasets/Auto+MPG\n\n1. Sources:\n   (a) Origin:  This dataset was taken from the StatLib library which is\n                maintained at Carnegie Mellon University. The dataset was \n                used in the 1983 American Statistical Association Exposition.\n   (c) Date: July 7, 1993\n\n2. Past Usage:\n    -  See 2b (above)\n    -  Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning.\n       In Proceedings on the Tenth International Conference of Machine \n       Learning, 236-243, University of Massachusetts, Amherst. Morgan\n       Kaufmann.\n\n3. Relevant Information:\n\n   This dataset is a slightly modified version of the dataset provided in\n   the StatLib library.  In line with the use by Ross Quinlan (1993) in\n   predicting the attribute "mpg", 8 of the original instances were removed \n   because they had unknown values for the "mpg" attribute.  The original \n   dataset is available in the file "auto-mpg.data-original".\n\n   "The data concerns city-cycle fuel consumption in miles per gallon,\n    to be predicted in terms of 3 multivalued discrete and 5 continuous\n    attributes." (Quinlan, 1993)\n\nAttribute information:\n\nmpg:      continuous\nweight:\t  continuous\n\nx: weight\n\ny: mpg\n\n', 'var1_desc': 'In the context of the Auto-MPG dataset, "Weight" is a continuous attribute representing the weight of a vehicle, which is used to predict the vehicle\'s fuel consumption in miles per gallon.', 'var2_desc': 'Fuel consumption refers to the amount of fuel a vehicle uses to travel a certain distance, often measured in miles per gallon (mpg), and is influenced by factors such as vehicle weight.'}
saved_pairs_info['pair0016'] = {'var1': ' Horsepower', 'var2': ' Acceleration', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0016:\n\nAuto-Mpg Data\n\nhttp://archive.ics.uci.edu/ml/datasets/Auto+MPG\n\n1. Sources:\n   (a) Origin:  This dataset was taken from the StatLib library which is\n                maintained at Carnegie Mellon University. The dataset was \n                used in the 1983 American Statistical Association Exposition.\n   (c) Date: July 7, 1993\n\n2. Past Usage:\n    -  See 2b (above)\n    -  Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning.\n       In Proceedings on the Tenth International Conference of Machine \n       Learning, 236-243, University of Massachusetts, Amherst. Morgan\n       Kaufmann.\n\n3. Relevant Information:\n\n   This dataset is a slightly modified version of the dataset provided in\n   the StatLib library.  In line with the use by Ross Quinlan (1993) in\n   predicting the attribute "mpg", 8 of the original instances were removed \n   because they had unknown values for the "mpg" attribute.  The original \n   dataset is available in the file "auto-mpg.data-original".\n\n   "The data concerns city-cycle fuel consumption in miles per gallon,\n    to be predicted in terms of 3 multivalued discrete and 5 continuous\n    attributes." (Quinlan, 1993)\n\nAttribute information:\n\nhorsepower:       continuous\nacceleration:\t  continuous\n\nx: horsepower\n\ny: acceleration\n\n', 'var1_desc': "Horsepower is a unit of measurement used to quantify the power output of an engine, often used in the context of automotive performance to indicate the engine's capacity to perform work.", 'var2_desc': 'Acceleration, in the context of the Auto-MPG dataset, is a continuous attribute representing the rate at which a vehicle increases its speed.'}
saved_pairs_info['pair0017'] = {'var1': ' Age', 'var2': ' Dividends from stocks', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0017:\n\nCensus Income (KDD) dataset\nhttps://archive.ics.uci.edu/ml/datasets/Census-Income+(KDD)\n\nInfo from the UCI ML webpage:\n\n"This data set contains weighted census data extracted from the 1994 and 1995\ncurrent population surveys conducted by the U.S. Census Bureau.\n\nOriginal Owner:\n\nU.S. Census Bureau\nhttp://www.census.gov/\nUnited States Department of Commerce\n\nDonor:\n\nTerran Lane and Ronny Kohavi\nData Mining and Visualization\nSilicon Graphics.\nterran \'@\' ecn.purdue.edu, ronnyk \'@\' sgi.com \n\nThe instance weight indicates the number of people in the population that each\nrecord represents due to stratified sampling. To do real analysis and derive\nconclusions, this field must be used. This attribute should *not* be used in\nthe classifiers."\n\nWe did not use the instance weight here.\n\nx: Age \n\ny: Dividends from stock\n\n\n', 'var1_desc': 'Age, in the context of the Census Income (KDD) dataset, refers to the numerical representation of the years a person has lived, serving as a demographic variable that can influence factors such as income and dividends from stock.', 'var2_desc': "Dividends from stocks refer to the portion of a company's earnings distributed to its shareholders, typically in cash or additional shares, serving as a source of income apart from any capital gains realized from the sale of the stock."}
saved_pairs_info['pair0018'] = {'var1': ' Age', 'var2': ' Concentration GAG', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': "Information for pairs0018:\n\nDescription:\n\nData were collected on the concentration of a chemical GAG in the urine of 314 children aged from zero to seventeen years. The aim of the study was to produce a chart to help a paediatrican to assess if a child's GAG concentration is â€˜normalâ€™.\n\n\nx:    age of child in years. \n\ny:    concentration of GAG (the units have been lost). \n\nSource\n\nMrs Susan Prosser, Paediatrics Department, University of Oxford, via Department of Statistics Consulting Service.\nReferences\n\nVenables, W. N. and Ripley, B. D. (2002) Modern Applied Statistics with S. Fourth edition. Springer. \n\n", 'var1_desc': "In this context, 'Age' refers to the number of years a child has lived, ranging from zero to seventeen, used as a variable to study its correlation with the concentration of a chemical GAG in their urine.", 'var2_desc': "Concentration of GAG refers to the quantity of a chemical known as Glycosaminoglycans (GAG) present in a child's urine, used to determine if the child's GAG levels are within the normal range."}
saved_pairs_info['pair0019'] = {'var1': ' Current duration', 'var2': ' Next interval', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0019:\n\nOld Faithful geyser data\n\nOld Faithful is a hydrothermal geyser in Yellowstone National Park in the state of Wyoming, USA. \nEach observation corresponds to a single erruption.\n\nx:    duration of erruption in minutes\n\ny:    time to the next erruption in minutes\n\n\nSource\nhttp://research.microsoft.com/en-us/um/people/cmbishop/PRML/webdatasets/datasets.htm\n\n\n', 'var1_desc': 'Current duration refers to the length of a single eruption of the Old Faithful geyser, measured in minutes.', 'var2_desc': 'The "Next interval" in this context refers to the time duration in minutes until the next eruption of the Old Faithful geyser after a specific eruption has occurred.'}
saved_pairs_info['pair0020'] = {'var1': ' Latitude', 'var2': ' Temperature', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0020:\n\nDWD data (Deutscher Wetterdienst)\n\ndata was taken at 349 stations\n\ntaken from\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\nmore recent  link (Jan 2010):\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\n\nx: latitude\n\ny: temperature (averaged over 1961-1990)\n\n', 'var1_desc': "Latitude is a geographical coordinate that specifies the north-south position of a point on the Earth's surface, measured in degrees from the equator, which is 0 degrees, with the poles at 90 degrees north and south.", 'var2_desc': 'Temperature is a quantitative measure of the degree of heat present in a substance or a system, often expressed in units of degrees, and in this context, it refers to the average atmospheric temperature recorded at various stations by the Deutscher Wetterdienst (DWD) between 1961-1990.'}
saved_pairs_info['pair0021'] = {'var1': ' Longitude', 'var2': ' Precipitation', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0021:\n\nDWD data (Deutscher Wetterdienst)\n\ndata was taken at 349 stations\n\ntaken from\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\nmore recent  link (Jan 2010):\nhttp://www.dwd.de/bvbw/appmanager/bvbw/dwdwwwDesktop/?_nfpb=true&_pageLabel=_dwdwww_klima_umwelt_klimadaten_deutschland&T82002gsbDocumentPath=Navigation%2FOeffentlichkeit%2FKlima__Umwelt%2FKlimadaten%2Fkldaten__kostenfrei%2Fausgabe__mittelwerte__node.html__nnn%3Dtrue\n\n\nx: longitude\n\ny: precipitation (yearly value averaged over 1961-1990)\n\n', 'var1_desc': "Longitude is a geographical coordinate that represents the east-west position of a point on the Earth's surface, measured in degrees from the prime meridian.", 'var2_desc': "Precipitation is a meteorological phenomenon that includes all forms of water, liquid or solid, falling from the atmosphere to the Earth's surface, typically measured in terms of the amount of water (in millimeters) deposited over a specific period (like yearly)."}
saved_pairs_info['pair0022'] = {'var1': ' Age', 'var2': ' Height', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0022:\n\nhttps://archive.ics.uci.edu/ml/datasets/Arrhythmia\n\nCardiac Arrhythmia Database\n\n1. Sources:\n   (a) Original owners od Database:\n       -- 1. H. Altay Guvenir, PhD., \n             Bilkent University,\n             Department of Computer Engineering and Information Science,\n             06533 Ankara, Turkey\n             Phone: +90 (312) 266 4133\n             Email: guvenir@cs.bilkent.edu.tr\n\n       -- 2. Burak Acar, M.S.,\n             Bilkent University, \n             EE Eng. Dept. \n             06533 Ankara, Turkey\n             Email: buraka@ee.bilkent.edu.tr\n\n       -- 2. Haldun Muderrisoglu, M.D., Ph.D., \n             Baskent University, \n             School of Medicine\n             Ankara, Turkey\n\n   (b) Donor: H. Altay Guvenir\n              Bilkent University,\n              Department of Computer Engineering and Information Science,\n              06533 Ankara, Turkey\n              Phone: +90 (312) 266 4133\n              Email: guvenir@cs.bilkent.edu.tr\n\n   (c) Date: January, 1998\n\n2. Past Usage:\n   1. H. Altay Guvenir, Burak Acar, Gulsen Demiroz, Ayhan Cekin\n      "A Supervised Machine Learning Algorithm for Arrhythmia Analysis"\n      Proceedings of the Computers in Cardiology Conference, \n      Lund, Sweden, 1997.\n      \n3. Number of Instances: 452\n\n4. Attribute Information:\n\nAge: \tAge in years  \t\tlinear\nHeight: Height in centimeters \tlinear\n\n\nx: age\n\ny: height\n\n\n', 'var1_desc': "The concept of 'Age' in this context refers to the number of years a person has lived, used as a linear variable in the Cardiac Arrhythmia Database for analyzing arrhythmia patterns.", 'var2_desc': 'Height, in the context of the Cardiac Arrhythmia Database, refers to the stature of an individual measured in centimeters, and is a linear attribute used in the dataset.'}
saved_pairs_info['pair0023'] = {'var1': ' Age', 'var2': ' Weight', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0023:\n\nhttps://archive.ics.uci.edu/ml/datasets/Arrhythmia\n\nCardiac Arrhythmia Database\n\n1. Sources:\n   (a) Original owners od Database:\n       -- 1. H. Altay Guvenir, PhD., \n             Bilkent University,\n             Department of Computer Engineering and Information Science,\n             06533 Ankara, Turkey\n             Phone: +90 (312) 266 4133\n             Email: guvenir@cs.bilkent.edu.tr\n\n       -- 2. Burak Acar, M.S.,\n             Bilkent University, \n             EE Eng. Dept. \n             06533 Ankara, Turkey\n             Email: buraka@ee.bilkent.edu.tr\n\n       -- 2. Haldun Muderrisoglu, M.D., Ph.D., \n             Baskent University, \n             School of Medicine\n             Ankara, Turkey\n\n   (b) Donor: H. Altay Guvenir\n              Bilkent University,\n              Department of Computer Engineering and Information Science,\n              06533 Ankara, Turkey\n              Phone: +90 (312) 266 4133\n              Email: guvenir@cs.bilkent.edu.tr\n\n   (c) Date: January, 1998\n\n2. Past Usage:\n   1. H. Altay Guvenir, Burak Acar, Gulsen Demiroz, Ayhan Cekin\n      "A Supervised Machine Learning Algorithm for Arrhythmia Analysis"\n      Proceedings of the Computers in Cardiology Conference, \n      Lund, Sweden, 1997.\n      \n3. Number of Instances: 452\n\n4. Attribute Information:\n\nAge: \tAge in years  \t\tlinear\nWeight: Height in centimeters \tlinear\n\n\nx: age\n\ny: weight\n\n\n', 'var1_desc': "'Age' in this context refers to the numerical representation of the years a person has lived, used as a variable in the Cardiac Arrhythmia Database for analyzing arrhythmia through machine learning algorithms.", 'var2_desc': "'Weight' in this context refers to the height of the individuals in centimeters, a linear attribute in the Cardiac Arrhythmia Database."}
saved_pairs_info['pair0024'] = {'var1': ' Age', 'var2': ' Heart rate', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0024:\n\nhttps://archive.ics.uci.edu/ml/datasets/Arrhythmia\n\nCardiac Arrhythmia Database\n\n1. Sources:\n   (a) Original owners od Database:\n       -- 1. H. Altay Guvenir, PhD., \n             Bilkent University,\n             Department of Computer Engineering and Information Science,\n             06533 Ankara, Turkey\n             Phone: +90 (312) 266 4133\n             Email: guvenir@cs.bilkent.edu.tr\n\n       -- 2. Burak Acar, M.S.,\n             Bilkent University, \n             EE Eng. Dept. \n             06533 Ankara, Turkey\n             Email: buraka@ee.bilkent.edu.tr\n\n       -- 2. Haldun Muderrisoglu, M.D., Ph.D., \n             Baskent University, \n             School of Medicine\n             Ankara, Turkey\n\n   (b) Donor: H. Altay Guvenir\n              Bilkent University,\n              Department of Computer Engineering and Information Science,\n              06533 Ankara, Turkey\n              Phone: +90 (312) 266 4133\n              Email: guvenir@cs.bilkent.edu.tr\n\n   (c) Date: January, 1998\n\n2. Past Usage:\n   1. H. Altay Guvenir, Burak Acar, Gulsen Demiroz, Ayhan Cekin\n      "A Supervised Machine Learning Algorithm for Arrhythmia Analysis"\n      Proceedings of the Computers in Cardiology Conference, \n      Lund, Sweden, 1997.\n      \n3. Number of Instances: 452\n\n4. Attribute Information:\n\nAge: \t\tAge in years \t \t\t\tlinear\nHeart rate: \tNumber of heart beats per minute \tlinear\n\n\nWe discarded one instance, because heart rate was missing.\n\n\nx: age\n\ny: heart rate\n\n\n', 'var1_desc': "'Age' in this context refers to the number of years a person has lived, used as a variable in the Cardiac Arrhythmia Database to study its potential correlation with heart rate.", 'var2_desc': 'Heart rate, in the context of the Cardiac Arrhythmia Database, refers to the number of heart beats per minute, which is a linear attribute used to analyze arrhythmia.'}
saved_pairs_info['pair0025'] = {'var1': ' Cement', 'var2': ' Compressive strength', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0025:\n\nConcrete Compressive Strength \n\n---------------------------------\n\nAbstract: Concrete is the most important material in civil engineering. The \nconcrete compressive strength is a highly nonlinear function of age and \ningredients. These ingredients include cement, blast furnace slag, fly ash, \nwater, superplasticizer, coarse aggregate, and fine aggregate.\n\n---------------------------------\n\nSources: \n\n  Original Owner and Donor\n  Prof. I-Cheng Yeh\n  Department of Information Management \n  Chung-Hua University, \n  Hsin Chu, Taiwan 30067, R.O.C.\n  e-mail:icyeh@chu.edu.tw\n  TEL:886-3-5186511\n\n  Date Donated: August 3, 2007\n \n---------------------------------\n\nData Characteristics:\n    \nThe actual concrete compressive strength (MPa) for a given mixture under a \nspecific age (days) was determined from laboratory. Data is in raw form (not scaled). \n\nNumber of instances (observations): 1030\n---------------------------------\n\nName  -- Description\n\nCement -- kg in a m3 mixture \nConcrete compressive strength  -- MPa \n\n\nx: cement\n\ny: compressive strength\n\n\n', 'var1_desc': 'Cement is a key ingredient in a concrete mixture, measured in kilograms per cubic meter (kg/m3), that significantly influences the compressive strength of the concrete.', 'var2_desc': "Compressive strength is a key property of concrete, indicating its ability to resist compression or withstand loads that tend to decrease its size, and it is determined as a nonlinear function of the concrete's age and its ingredients such as cement, blast furnace slag, fly ash, water, superplasticizer, coarse aggregate, and fine aggregate."}
saved_pairs_info['pair0026'] = {'var1': ' Blast furnace slag', 'var2': ' Compressive strength', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0026:\n\nConcrete Compressive Strength \n\n---------------------------------\n\nAbstract: Concrete is the most important material in civil engineering. The \nconcrete compressive strength is a highly nonlinear function of age and \ningredients. These ingredients include cement, blast furnace slag, fly ash, \nwater, superplasticizer, coarse aggregate, and fine aggregate.\n\n---------------------------------\n\nSources: \n\n  Original Owner and Donor\n  Prof. I-Cheng Yeh\n  Department of Information Management \n  Chung-Hua University, \n  Hsin Chu, Taiwan 30067, R.O.C.\n  e-mail:icyeh@chu.edu.tw\n  TEL:886-3-5186511\n\n  Date Donated: August 3, 2007\n \n---------------------------------\n\nData Characteristics:\n    \nThe actual concrete compressive strength (MPa) for a given mixture under a \nspecific age (days) was determined from laboratory. Data is in raw form (not scaled). \n\nNumber of instances (observations): 1030\n---------------------------------\n\nName  -- Description\n\nBlast Furnace Slag -- kg in a m3 mixture \nConcrete compressive strength -- MPa \n\n\nx: blast furnace slag\n\ny: compressive strength\n\n\n', 'var1_desc': 'Blast furnace slag is a byproduct of the iron-making process, used as a supplementary cementitious material in a m3 mixture, which contributes to the compressive strength of concrete.', 'var2_desc': 'Compressive strength refers to the capacity of a material, in this case, concrete, to withstand loads that tend to reduce its size, and it is measured in Megapascals (MPa).'}
saved_pairs_info['pair0027'] = {'var1': ' Fly ash', 'var2': ' Compressive strength', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0027:\n\nConcrete Compressive Strength \n\n---------------------------------\n\nAbstract: Concrete is the most important material in civil engineering. The \nconcrete compressive strength is a highly nonlinear function of age and \ningredients. These ingredients include cement, blast furnace slag, fly ash, \nwater, superplasticizer, coarse aggregate, and fine aggregate.\n\n---------------------------------\n\nSources: \n\n  Original Owner and Donor\n  Prof. I-Cheng Yeh\n  Department of Information Management \n  Chung-Hua University, \n  Hsin Chu, Taiwan 30067, R.O.C.\n  e-mail:icyeh@chu.edu.tw\n  TEL:886-3-5186511\n\n  Date Donated: August 3, 2007\n \n---------------------------------\n\nData Characteristics:\n    \nThe actual concrete compressive strength (MPa) for a given mixture under a \nspecific age (days) was determined from laboratory. Data is in raw form (not scaled). \n\nNumber of instances (observations): 1030\n---------------------------------\n\nName  -- Description\n\nFly Ash -- kg in a m3 mixture \nConcrete compressive strength -- MPa \n\n\nx: fly ash\n\ny: compressive strength\n\n\n', 'var1_desc': 'Fly ash is a byproduct of coal combustion in power plants, often used as a supplementary cementitious material in the production of concrete due to its pozzolanic properties, enhancing the strength and durability of the mixture.', 'var2_desc': 'Compressive strength refers to the capacity of a material, in this case concrete, to withstand loads tending to reduce size, measured in megapascals (MPa), and is a crucial factor in determining the durability and structural integrity of the material.'}
saved_pairs_info['pair0028'] = {'var1': ' Water', 'var2': ' Compressive strength', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0028:\n\nConcrete Compressive Strength \n\n---------------------------------\n\nAbstract: Concrete is the most important material in civil engineering. The \nconcrete compressive strength is a highly nonlinear function of age and \ningredients. These ingredients include cement, blast furnace slag, fly ash, \nwater, superplasticizer, coarse aggregate, and fine aggregate.\n\n---------------------------------\n\nSources: \n\n  Original Owner and Donor\n  Prof. I-Cheng Yeh\n  Department of Information Management \n  Chung-Hua University, \n  Hsin Chu, Taiwan 30067, R.O.C.\n  e-mail:icyeh@chu.edu.tw\n  TEL:886-3-5186511\n\n  Date Donated: August 3, 2007\n \n---------------------------------\n\nData Characteristics:\n    \nThe actual concrete compressive strength (MPa) for a given mixture under a \nspecific age (days) was determined from laboratory. Data is in raw form (not scaled). \n\nNumber of instances (observations): 1030\n---------------------------------\n\nName  -- Description\n\nWater -- kg in a m3 mixture \nConcrete compressive strength -- MPa \n\n\nx: water\n\ny: compressive strength\n\n\n', 'var1_desc': "In the context of concrete compressive strength, 'Water' refers to the quantity of water, measured in kilograms, used in a cubic meter mixture of concrete, which plays a crucial role in determining the strength and durability of the concrete.", 'var2_desc': 'Compressive strength is a key property of concrete, indicating its ability to resist compression forces, and is determined by the mix of ingredients and the age of the concrete, with the strength measured in megapascals (MPa).'}
saved_pairs_info['pair0029'] = {'var1': ' Superplasticizer', 'var2': ' Compressive strength', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0029:\n\nConcrete Compressive Strength \n\n---------------------------------\n\nAbstract: Concrete is the most important material in civil engineering. The \nconcrete compressive strength is a highly nonlinear function of age and \ningredients. These ingredients include cement, blast furnace slag, fly ash, \nwater, superplasticizer, coarse aggregate, and fine aggregate.\n\n---------------------------------\n\nSources: \n\n  Original Owner and Donor\n  Prof. I-Cheng Yeh\n  Department of Information Management \n  Chung-Hua University, \n  Hsin Chu, Taiwan 30067, R.O.C.\n  e-mail:icyeh@chu.edu.tw\n  TEL:886-3-5186511\n\n  Date Donated: August 3, 2007\n \n---------------------------------\n\nData Characteristics:\n    \nThe actual concrete compressive strength (MPa) for a given mixture under a \nspecific age (days) was determined from laboratory. Data is in raw form (not scaled). \n\nNumber of instances (observations): 1030\n---------------------------------\n\nName  -- Description\n\nSuperplasticizer -- kg in a m3 mixture\nConcrete compressive strength -- MPa \n\n\nx: superplasticizer\n\ny: compressive strength\n\n\n', 'var1_desc': "Superplasticizer is a component used in a concrete mixture, measured in kilograms per cubic meter, that enhances the workability of the mixture and allows for a reduction in water content without compromising the concrete's compressive strength.", 'var2_desc': 'Compressive strength refers to the capacity of a material, in this case concrete, to withstand loads tending to reduce size, measured in megapascals (MPa), and is determined by testing the material in a laboratory under specific conditions.'}
saved_pairs_info['pair0030'] = {'var1': ' Coarse aggregate', 'var2': ' Compressive strength', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0030:\n\nConcrete Compressive Strength \n\n---------------------------------\n\nAbstract: Concrete is the most important material in civil engineering. The \nconcrete compressive strength is a highly nonlinear function of age and \ningredients. These ingredients include cement, blast furnace slag, fly ash, \nwater, superplasticizer, coarse aggregate, and fine aggregate.\n\n---------------------------------\n\nSources: \n\n  Original Owner and Donor\n  Prof. I-Cheng Yeh\n  Department of Information Management \n  Chung-Hua University, \n  Hsin Chu, Taiwan 30067, R.O.C.\n  e-mail:icyeh@chu.edu.tw\n  TEL:886-3-5186511\n\n  Date Donated: August 3, 2007\n \n---------------------------------\n\nData Characteristics:\n    \nThe actual concrete compressive strength (MPa) for a given mixture under a \nspecific age (days) was determined from laboratory. Data is in raw form (not scaled). \n\nNumber of instances (observations): 1030\n---------------------------------\n\nName  -- Description\n\nCoarse Aggregate -- kg in a m3 mixture \nConcrete compressive strength -- MPa \n\n\nx: coarse aggregate\n\ny: compressive strength\n\n\n', 'var1_desc': 'Coarse aggregate refers to the larger material used in a concrete mixture, typically measured in kilograms per cubic meter, that contributes to the overall compressive strength of the concrete.', 'var2_desc': 'Compressive strength refers to the capacity of a material, in this case concrete, to withstand loads tending to reduce size, measured in megapascals (MPa), and is a critical property in civil engineering, influenced by factors such as age and composition of the material.'}
saved_pairs_info['pair0031'] = {'var1': ' Fine aggregate', 'var2': ' Compressive strength', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0031:\n\nConcrete Compressive Strength \n\n---------------------------------\n\nAbstract: Concrete is the most important material in civil engineering. The \nconcrete compressive strength is a highly nonlinear function of age and \ningredients. These ingredients include cement, blast furnace slag, fly ash, \nwater, superplasticizer, coarse aggregate, and fine aggregate.\n\n---------------------------------\n\nSources: \n\n  Original Owner and Donor\n  Prof. I-Cheng Yeh\n  Department of Information Management \n  Chung-Hua University, \n  Hsin Chu, Taiwan 30067, R.O.C.\n  e-mail:icyeh@chu.edu.tw\n  TEL:886-3-5186511\n\n  Date Donated: August 3, 2007\n \n---------------------------------\n\nData Characteristics:\n    \nThe actual concrete compressive strength (MPa) for a given mixture under a \nspecific age (days) was determined from laboratory. Data is in raw form (not scaled). \n\nNumber of instances (observations): 1030\n---------------------------------\n\nName  -- Description\n\nFine Aggregate -- kg in a m3 mixture \nConcrete compressive strength -- MPa \n\n\nx: fine aggregate\n\ny: compressive strength\n\n\n', 'var1_desc': "Fine aggregate refers to the smaller, granular materials, often sand or crushed stone, used in a concrete mixture, measured in kilograms per cubic meter (kg/m3), that significantly influence the concrete's compressive strength.", 'var2_desc': 'Compressive strength refers to the capacity of a material, such as concrete, to withstand axial compressive loads, and it is measured in megapascals (MPa); it is a crucial property in civil engineering, as it indicates the maximum compressive stress that the material can bear without failure.'}
saved_pairs_info['pair0032'] = {'var1': ' Age', 'var2': ' Compressive strength', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0032:\n\nConcrete Compressive Strength \n\n---------------------------------\n\nAbstract: Concrete is the most important material in civil engineering. The \nconcrete compressive strength is a highly nonlinear function of age and \ningredients. These ingredients include cement, blast furnace slag, fly ash, \nwater, superplasticizer, coarse aggregate, and fine aggregate.\n\n---------------------------------\n\nSources: \n\n  Original Owner and Donor\n  Prof. I-Cheng Yeh\n  Department of Information Management \n  Chung-Hua University, \n  Hsin Chu, Taiwan 30067, R.O.C.\n  e-mail:icyeh@chu.edu.tw\n  TEL:886-3-5186511\n\n  Date Donated: August 3, 2007\n \n---------------------------------\n\nData Characteristics:\n    \nThe actual concrete compressive strength (MPa) for a given mixture under a \nspecific age (days) was determined from laboratory. Data is in raw form (not scaled). \n\nNumber of instances (observations): 1030\n---------------------------------\n\nName  -- Description\n\nAge -- Day (1~365)\nConcrete compressive strength -- MPa \n\n\nx: age\n\ny: compressive strength\n\n\n', 'var1_desc': 'In the context of concrete compressive strength, "Age" refers to the number of days (ranging from 1 to 365) since the concrete mixture was made, which is a key factor influencing the strength of the concrete over time.', 'var2_desc': "Compressive strength is a key property of concrete, indicating its ability to withstand a certain amount of load or pressure without deformation, and it is a nonlinear function of the concrete's age and its ingredients such as cement, blast furnace slag, fly ash, water, superplasticizer, coarse aggregate, and fine aggregate."}
saved_pairs_info['pair0033'] = {'var1': ' Alcohol consumption', 'var2': ' Mean corpuscular volume', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0033:\n\nhttps://archive.ics.uci.edu/ml/datasets/Liver+Disorders\n\nTitle: BUPA liver disorders\n\n2. Source information:\n   -- Creators: BUPA Medical Research Ltd.\n   -- Donor: Richard S. Forsyth\n             8 Grosvenor Avenue\n             Mapperley Park\n             Nottingham NG3 5DX\n             0602-621676\n   -- Date: 5/15/1990\n\n\n4. Relevant information:\n   -- The second variable is a blood test which is thought\n      to be sensitive to liver disorders that might arise from\n      excessive alcohol consumption.  Each line \n      constitutes the record of a single male individual.\n\nx:    drinks\tnumber of half-pint equivalents of alcoholic beverages\n                drunk per day\n\n\ny:    mcv\tmean corpuscular volume\n\n\n', 'var1_desc': 'Alcohol consumption refers to the intake of alcoholic beverages, quantified in this context as the number of half-pint equivalents drunk per day.', 'var2_desc': 'Mean Corpuscular Volume (MCV) is a measure of the average volume of a red blood cell, typically used in blood tests to help diagnose and monitor certain conditions, including anemia and liver disorders.'}
saved_pairs_info['pair0034'] = {'var1': ' Alcohol consumption', 'var2': ' Alkaline phosphotase', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0035:\n\nhttps://archive.ics.uci.edu/ml/datasets/Liver+Disorders\n\nTitle: BUPA liver disorders\n\n2. Source information:\n   -- Creators: BUPA Medical Research Ltd.\n   -- Donor: Richard S. Forsyth\n             8 Grosvenor Avenue\n             Mapperley Park\n             Nottingham NG3 5DX\n             0602-621676\n   -- Date: 5/15/1990\n\n4. Relevant information:\n   -- The second variable is a blood test which is thought\n      to be sensitive to liver disorders that might arise from\n      excessive alcohol consumption.  Each line \n      constitutes the record of a single male individual.\n\nx:    drinks\tnumber of half-pint equivalents of alcoholic beverages\n                drunk per day\n\n\ny:    alkphos\talkaline phosphotase\n\n\n', 'var1_desc': 'Alcohol consumption refers to the intake of alcoholic beverages, measured in this context as the number of half-pint equivalents drunk per day, which can potentially lead to liver disorders detectable through sensitive blood tests.', 'var2_desc': 'Alkaline phosphatase (alkphos) is an enzyme found in several tissues throughout the body, with high concentrations in the liver, and its levels in the blood can increase in response to liver disorders, often caused by excessive alcohol consumption.'}
saved_pairs_info['pair0035'] = {'var1': ' Alcohol consumption', 'var2': ' Alanine aminotransferase', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0035:\n\nhttps://archive.ics.uci.edu/ml/datasets/Liver+Disorders\n\nTitle: BUPA liver disorders\n\n2. Source information:\n   -- Creators: BUPA Medical Research Ltd.\n   -- Donor: Richard S. Forsyth\n             8 Grosvenor Avenue\n             Mapperley Park\n             Nottingham NG3 5DX\n             0602-621676\n   -- Date: 5/15/1990\n\n\n4. Relevant information:\n   -- The second variable is a blood test which is thought\n      to be sensitive to liver disorders that might arise from\n      excessive alcohol consumption.  Each line \n      constitutes the record of a single male individual.\n\nx:    drinks\tnumber of half-pint equivalents of alcoholic beverages\n                drunk per day\n\n\ny:    sgpt \talanine aminotransferase\n\n\n', 'var1_desc': 'Alcohol consumption refers to the intake of alcoholic beverages, measured in this context as the number of half-pint equivalents consumed per day, and is associated with various health outcomes, including potential liver disorders detectable through blood tests like alanine aminotransferase (sgpt).', 'var2_desc': 'Alanine aminotransferase (ALT) is a key enzyme involved in protein metabolism, predominantly found in the liver and kidneys, and its elevated blood levels are often indicative of liver damage or disease.'}
saved_pairs_info['pair0036'] = {'var1': ' Alcohol consumption', 'var2': ' Aspartate aminotransferase', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0036:\n\nhttps://archive.ics.uci.edu/ml/datasets/Liver+Disorders\n\nTitle: BUPA liver disorders\n\n2. Source information:\n   -- Creators: BUPA Medical Research Ltd.\n   -- Donor: Richard S. Forsyth\n             8 Grosvenor Avenue\n             Mapperley Park\n             Nottingham NG3 5DX\n             0602-621676\n   -- Date: 5/15/1990\n\n\n4. Relevant information:\n   -- The second variable is a blood test which is thought\n      to be sensitive to liver disorders that might arise from\n      excessive alcohol consumption.  Each line \n      constitutes the record of a single male individual.\n\nx:    drinks\tnumber of half-pint equivalents of alcoholic beverages\n                drunk per day\n\n\ny:    sgot  \taspartate aminotransferase\n\n\n', 'var1_desc': 'Alcohol consumption refers to the intake of alcoholic beverages, quantified in this dataset by the number of half-pint equivalents drunk per day, which can potentially lead to liver disorders detectable through blood tests such as aspartate aminotransferase (sgot).', 'var2_desc': ' Aspartate aminotransferase (AST) is an enzyme mainly found in the liver and heart, often used as a blood test marker for detecting liver damage or diseases, especially those potentially caused by excessive alcohol intake. '}
saved_pairs_info['pair0037'] = {'var1': ' Alcohol consumption', 'var2': ' Gamma-glutamyl transpeptdase', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0037:\n\nhttps://archive.ics.uci.edu/ml/datasets/Liver+Disorders\n\nTitle: BUPA liver disorders\n\n2. Source information:\n   -- Creators: BUPA Medical Research Ltd.\n   -- Donor: Richard S. Forsyth\n             8 Grosvenor Avenue\n             Mapperley Park\n             Nottingham NG3 5DX\n             0602-621676\n   -- Date: 5/15/1990\n\n\n4. Relevant information:\n   -- The second variable is a blood test which is thought\n      to be sensitive to liver disorders that might arise from\n      excessive alcohol consumption.  Each line \n      constitutes the record of a single male individual.\n\nx:    drinks\tnumber of half-pint equivalents of alcoholic beverages\n                drunk per day\n\n\ny:    gammagt \tgamma-glutamyl transpeptdase\n\n\n', 'var1_desc': 'Alcohol consumption refers to the intake of alcoholic beverages, measured in this dataset by the number of half-pint equivalents consumed per day, which can potentially lead to liver disorders detectable through sensitive blood tests like gamma-glutamyl transpeptidase.', 'var2_desc': 'Gamma-glutamyl transpeptidase (GGT) is an enzyme that is primarily found in the liver and is involved in the transfer of amino acids across the cellular membrane, often used as a biomarker for liver disease and excessive alcohol consumption.'}
saved_pairs_info['pair0038'] = {'var1': ' Age', 'var2': ' Body mass index', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0038:\n\nPima Indians Diabetes Database\n\n1. Sources:\n   (a) Original owners: National Institute of Diabetes and Digestive and\n                        Kidney Diseases\n   (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu)\n                          Research Center, RMI Group Leader\n                          Applied Physics Laboratory\n                          The Johns Hopkins University\n                          Johns Hopkins Road\n                          Laurel, MD 20707\n                          (301) 953-6231\n   (c) Date received: 9 May 1990\n\n2. Past Usage:\n    1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \\&\n       Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast\n       the onset of diabetes mellitus.  In {\\it Proceedings of the Symposium\n       on Computer Applications and Medical Care} (pp. 261--265).  IEEE\n       Computer Society Press.\n\n       The diagnostic, binary-valued variable investigated is whether the\n       patient shows signs of diabetes according to World Health Organization\n       criteria (i.e., if the 2 hour post-load plasma glucose was at least \n       200 mg/dl at any survey  examination or if found during routine medical\n       care).   The population lives near Phoenix, Arizona, USA.\n\n3. Relevant Information:\n      Several constraints were placed on the selection of these instances from\n      a larger database.  In particular, all patients here are females at\n      least 21 years old of Pima Indian heritage.  \n\n4. Number of Instances: 768\n\nx: age\n\ny: body mass index (weight in kg/(height in m)^2)\n\n\n', 'var1_desc': "'Age' in this context refers to the numerical representation of the years a female participant of Pima Indian heritage, who is at least 21 years old, has lived, and it is a variable used in the Pima Indians Diabetes Database to study its correlation with diabetes and body mass index.", 'var2_desc': "Body Mass Index (BMI) is a numerical value derived from a person's weight and height, calculated by dividing the weight in kilograms by the square of height in meters, and it is commonly used to assess if a person has a healthy body weight for a given height."}
saved_pairs_info['pair0039'] = {'var1': ' Age', 'var2': ' Serum insulin', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0039:\n\nPima Indians Diabetes Database\n\n1. Sources:\n   (a) Original owners: National Institute of Diabetes and Digestive and\n                        Kidney Diseases\n   (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu)\n                          Research Center, RMI Group Leader\n                          Applied Physics Laboratory\n                          The Johns Hopkins University\n                          Johns Hopkins Road\n                          Laurel, MD 20707\n                          (301) 953-6231\n   (c) Date received: 9 May 1990\n\n2. Past Usage:\n    1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \\&\n       Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast\n       the onset of diabetes mellitus.  In {\\it Proceedings of the Symposium\n       on Computer Applications and Medical Care} (pp. 261--265).  IEEE\n       Computer Society Press.\n\n       The diagnostic, binary-valued variable investigated is whether the\n       patient shows signs of diabetes according to World Health Organization\n       criteria (i.e., if the 2 hour post-load plasma glucose was at least \n       200 mg/dl at any survey  examination or if found during routine medical\n       care).   The population lives near Phoenix, Arizona, USA.\n\n3. Relevant Information:\n      Several constraints were placed on the selection of these instances from\n      a larger database.  In particular, all patients here are females at\n      least 21 years old of Pima Indian heritage.  \n\n4. Number of Instances: 768\n\nx: age\n\ny: 2-Hour serum insulin (mu U/ml)\n\n\n', 'var1_desc': 'Age, in the context of the Pima Indians Diabetes Database, refers to the number of years a female participant of Pima Indian heritage, who is at least 21 years old, has lived.', 'var2_desc': 'Serum insulin is a biochemical marker in the blood that measures the level of insulin, a hormone produced by the pancreas that regulates the amount of glucose in the body.'}
saved_pairs_info['pair0040'] = {'var1': ' Age', 'var2': ' Diastolic blood pressure', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0040:\n\nPima Indians Diabetes Database\n\n1. Sources:\n   (a) Original owners: National Institute of Diabetes and Digestive and\n                        Kidney Diseases\n   (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu)\n                          Research Center, RMI Group Leader\n                          Applied Physics Laboratory\n                          The Johns Hopkins University\n                          Johns Hopkins Road\n                          Laurel, MD 20707\n                          (301) 953-6231\n   (c) Date received: 9 May 1990\n\n2. Past Usage:\n    1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \\&\n       Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast\n       the onset of diabetes mellitus.  In {\\it Proceedings of the Symposium\n       on Computer Applications and Medical Care} (pp. 261--265).  IEEE\n       Computer Society Press.\n\n       The diagnostic, binary-valued variable investigated is whether the\n       patient shows signs of diabetes according to World Health Organization\n       criteria (i.e., if the 2 hour post-load plasma glucose was at least \n       200 mg/dl at any survey  examination or if found during routine medical\n       care).   The population lives near Phoenix, Arizona, USA.\n\n3. Relevant Information:\n      Several constraints were placed on the selection of these instances from\n      a larger database.  In particular, all patients here are females at\n      least 21 years old of Pima Indian heritage.  \n\n4. Number of Instances: 768\n\nx: age\n\ny: diastolic blood pressure (mm Hg)\n\n\n', 'var1_desc': 'Age, in the context of the Pima Indians Diabetes Database, refers to the number of years a female patient of Pima Indian heritage, who is at least 21 years old, has lived.', 'var2_desc': 'Diastolic blood pressure is the pressure in the arteries when the heart rests between beats, providing a measure of the minimum blood pressure in the cardiovascular system.'}
saved_pairs_info['pair0041'] = {'var1': ' Age', 'var2': ' Plasma glucose concentration', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0041:\n\nPima Indians Diabetes Database\n\n1. Sources:\n   (a) Original owners: National Institute of Diabetes and Digestive and\n                        Kidney Diseases\n   (b) Donor of database: Vincent Sigillito (vgs@aplcen.apl.jhu.edu)\n                          Research Center, RMI Group Leader\n                          Applied Physics Laboratory\n                          The Johns Hopkins University\n                          Johns Hopkins Road\n                          Laurel, MD 20707\n                          (301) 953-6231\n   (c) Date received: 9 May 1990\n\n2. Past Usage:\n    1. Smith,~J.~W., Everhart,~J.~E., Dickson,~W.~C., Knowler,~W.~C., \\&\n       Johannes,~R.~S. (1988). Using the ADAP learning algorithm to forecast\n       the onset of diabetes mellitus.  In {\\it Proceedings of the Symposium\n       on Computer Applications and Medical Care} (pp. 261--265).  IEEE\n       Computer Society Press.\n\n       The diagnostic, binary-valued variable investigated is whether the\n       patient shows signs of diabetes according to World Health Organization\n       criteria (i.e., if the 2 hour post-load plasma glucose was at least \n       200 mg/dl at any survey  examination or if found during routine medical\n       care).   The population lives near Phoenix, Arizona, USA.\n\n3. Relevant Information:\n      Several constraints were placed on the selection of these instances from\n      a larger database.  In particular, all patients here are females at\n      least 21 years old of Pima Indian heritage.  \n\n4. Number of Instances: 768\n\nx: age\n\ny: Plasma glucose concentration a 2 hours in an oral glucose tolerance test\n\n\n', 'var1_desc': 'Age, in the context of the Pima Indians Diabetes Database, refers to the number of years a female participant of Pima Indian heritage, who is at least 21 years old, has lived, and it serves as an independent variable in the study of diabetes onset.', 'var2_desc': 'Plasma glucose concentration refers to the amount of glucose, a type of sugar, present in the blood, typically measured in milligrams per deciliter (mg/dL), and is often assessed in medical tests such as the oral glucose tolerance test to diagnose conditions like diabetes.'}
saved_pairs_info['pair0042'] = {'var1': ' Day of the year', 'var2': ' Temperature', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0042:\n\nx (int): days of the year\ny (float): mean daily temperature of Furtwangen (Schwarzwald), Germany\n\ntime range: 1/1/1979-31/1/2004\n\ndata was computed as follows:\n\n(morning temperature + midday temperature + 2 * evening temperature) / 4\n\nsource: Bernward Janzing, private archive\n\n', 'var1_desc': 'The "Day of the year" is a numerical system that assigns a unique value to each day within a year, ranging from 1 to 365 (or 366 in a leap year), used in this context to chronologically track and analyze the mean daily temperature in Furtwangen, Germany over a 25-year period.', 'var2_desc': 'Temperature is a quantitative measure of the degree of heat present in a substance or an environment, often expressed in units such as Celsius, Fahrenheit, or Kelvin.'}
saved_pairs_info['pair0043'] = {'var1': ' Temperature at t', 'var2': ' Temperature at t+1', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0043:\n\nMean Daily Air temperature near surface (.995 sigma level) on a 144x73 grid (2.5 degree) of day 50 and day 51 of year 2000.\n\nx: year 2000, day 50\ny: year 2000, day 51\n\nunits: K\n\nData source:\nhttp://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.surface.html\n\n\n', 'var1_desc': 'Temperature at t refers to the mean daily air temperature measured near the surface level (.995 sigma level) on a specific day (t) of a particular year, represented on a 144x73 grid (2.5 degree).', 'var2_desc': "The concept of Temperature at t+1 refers to the mean daily air temperature near the earth's surface on the following day (day 51) in the year 2000, measured on a 144x73 grid (2.5 degree) and expressed in Kelvin (K)."}
saved_pairs_info['pair0044'] = {'var1': ' Pressure at t', 'var2': ' Pressure at t+1', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0044:\n\nMean Daily pressure at surface on a 144x73 grid (2.5 degree) of day 50 and day 51 of year 2000.\n\nx: year 2000, day 50\ny: year 2000, day 51\n\nunits: Pascal\n\nData source:\nhttp://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.surface.html\n\n\n', 'var1_desc': "Pressure at t is the quantification of the atmospheric pressure at a specific time point 't', measured in Pascals, on a 144x73 grid (2.5 degree) surface, as provided by the data from the Earth System Research Laboratory.", 'var2_desc': '"Pressure at t+1" refers to the atmospheric pressure measured in Pascal at the surface level on a 144x73 grid (2.5 degree) for the day following a given day (t), in this case, day 51 of the year 2000.'}
saved_pairs_info['pair0045'] = {'var1': ' Sea level pressure at t', 'var2': ' Sea level pressure at t+1', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0045:\n\nMean daily sea level pressure on a 144x73 grid (2.5 degree) of day 50 and day 51 of year 2000.\n\nx: year 2000, day 50\ny: year 2000, day 51\n\nunits: Pascal\n\nData source:\nhttp://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.surface.html\n\n\n', 'var1_desc': "Sea level pressure at time 't' is the atmospheric pressure measured at sea level at a specific day (t) in the year 2000, represented on a 144x73 grid with 2.5-degree intervals.", 'var2_desc': 'Sea level pressure at t+1 refers to the atmospheric pressure measured at sea level on the following day (day 51) in the year 2000, represented on a 144x73 grid with a 2.5-degree resolution, with the units expressed in Pascal.'}
saved_pairs_info['pair0046'] = {'var1': ' Relative humidity at t', 'var2': ' Relative humidity at t+1', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0046:\n\nMean daily relative humidity near surface (.995 sigma level) on a 144x73 grid (2.5 degree) of day 50 and day 51 of year 2000.\n\nx: year 2000, day 50\ny: year 2000, day 51\n\nunits: %\n\nData source:\nhttp://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.surface.html\n\n\n', 'var1_desc': 'Relative humidity at t refers to the percentage of moisture in the air at a specific time and location, measured near the surface on a 144x73 grid (2.5 degree) for a particular day in the year 2000.', 'var2_desc': 'Relative humidity at t+1 refers to the measurement of atmospheric moisture content at a specific grid location, expressed as a percentage, on the day following a given time point (t), in this case, day 50 of the year 2000.'}
saved_pairs_info['pair0047'] = {'var1': ' Number of cars', 'var2': ' Type of day', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0047:\n\nTraffic\n\nx: number of cars per 24h at different counting stations in Oberschwaben, Germany\ny: categorical :\n\t1 \t -> Sundays + holidays\n\t2\t -> working days \n\nData source:\nhttp://www.b30-oberschwaben.de/html/tabelle.html\n\n', 'var1_desc': 'The "Number of cars" refers to the total count of vehicles recorded in a 24-hour period at various counting stations in Oberschwaben, Germany.', 'var2_desc': 'The "Type of day" is a categorical variable in the dataset, which classifies the days into two categories: \'1\' representing Sundays and holidays, and \'2\' representing working days, to analyze the traffic volume in Oberschwaben, Germany.'}
saved_pairs_info['pair0048'] = {'var1': ' Indoor temperature', 'var2': ' Outdoor temperature', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pairs0048:\n\nx: Indoor temperature \n\ny: Outdoor temperature\n\nsource:\nHipel and Mcleod (1994): \nTime series modelling of water resources and environmental systems\nwww.stats.uwo.ca/faculty/mcleod/epubs/mhsets/readme-mhsets.html.\n\n', 'var1_desc': 'Indoor temperature refers to the degree or intensity of heat present within a confined space like a building, which can be influenced by various factors such as outdoor temperature, insulation, and heating or cooling systems.', 'var2_desc': 'Outdoor temperature refers to the degree or intensity of heat present in the environment outside of an enclosed space, which can influence various environmental systems and water resources.'}
saved_pairs_info['pair0049'] = {'var1': ' Ozone concentration', 'var2': ' Temperature', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0049:\n\nDaily mean values of ozone and temperature of year 2009 in Lausanne-CÃ©sar-Roux, Switzerland\n\nx: Ozone (microgram / cubic meter)\n\ny: Temperature (degree celsius)\n\nsource:\nBundesamt fuer Umwelt (BAFU, Switzerland)\nwww.bafu.admin.ch/luft/luftbelastung/blick_zurueck/datenabfrage/index.html?lang=de\n\n\n', 'var1_desc': 'Ozone concentration, represented here as micrograms per cubic meter, refers to the amount of ozone present in a specific volume of air, which is a key indicator of air quality and can have significant impacts on human health and the environment.', 'var2_desc': "Temperature, denoted as 'y' in this context, refers to the average daily atmospheric heat measured in degrees Celsius for the year 2009 in Lausanne-César-Roux, Switzerland."}
saved_pairs_info['pair0050'] = {'var1': ' Ozone concentration', 'var2': ' Temperature', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0050:\n\nDaily mean values of ozone and temperature of year 2009 in Chaumont, Switzerland\n\nx: Ozone (microgram / cubic meter)\n\ny: Temperature (degree celsius)\n\nsource:\nBundesamt fuer Umwelt (BAFU, Switzerland)\nwww.bafu.admin.ch/luft/luftbelastung/blick_zurueck/datenabfrage/index.html?lang=de\n\n', 'var1_desc': 'Ozone concentration, measured in micrograms per cubic meter, refers to the amount of ozone gas present in a specific volume of air, which is a key indicator of air quality and can have significant impacts on human health and the environment.', 'var2_desc': 'Temperature, in this context, refers to the daily average atmospheric temperature in degrees Celsius recorded in Chaumont, Switzerland during the year 2009.'}
saved_pairs_info['pair0051'] = {'var1': ' Ozone concentration', 'var2': ' Temperature', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0051:\n\nDaily mean values of ozone and temperature of year 2009 in Davos-See, Switzerland\n\nx: Ozone (microgram / cubic meter)\n\ny: Temperature (degree celsius)\n\nsource:\nBundesamt fuer Umwelt (BAFU, Switzerland)\nwww.bafu.admin.ch/luft/luftbelastung/blick_zurueck/datenabfrage/index.html?lang=de\n\n', 'var1_desc': 'Ozone concentration refers to the amount of ozone present in a specific volume of air, typically measured in micrograms per cubic meter, which can be influenced by various environmental factors such as temperature.', 'var2_desc': 'Temperature, in this context, refers to the average daily atmospheric temperature measured in degrees Celsius in Davos-See, Switzerland during the year 2009.'}
saved_pairs_info['pair0052'] = {'var1': ' (Temp and Press and SLP and Rh)', 'var2': ' (Temp and Press and Slp and Rh)', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0052:\n\nx and y are 4-dimensional variables for day 51 resp. 50 of year 2000 on a 144x73 grid (2.5 degree).  The four dimensions are:\n\nx: year 2000, day 51\ny: year 2000, day 50\n\n1) Mean Daily Air temperature at sigma level 995 \n\nvariable type: int16\nskalefactor: 0.00999999977648258\nadd offset: 512.809997558594\nunits: degK\n\n2) Mean Daily pressure at surface \n\nvariable type: int16\nskalefactor: 10\nadd offset: 367650\nunits: Pascal\n\n3) Mean Daily sea level pressure \n\nvariable type: int16\nadd offset: 119765\nunits: Pascal\n\n4) Mean Daily relative humidity at sigma level 995 \n\nvariable type: int16\nadd offset: 302.649993896484\nscale factor: 0.00999999977648258\nunits: %\n\nData source:\nhttp://www.esrl.noaa.gov/psd/data/gridded/data.ncep.reanalysis.surface.html\n\n\n', 'var1_desc': 'Temp and Press and SLP and Rh are four-dimensional meteorological variables representing Mean Daily Air Temperature, Mean Daily Pressure at Surface, Mean Daily Sea Level Pressure, and Mean Daily Relative Humidity, respectively, for days 50 and 51 of the year 2000, measured on a 144x73 grid with specific scale factors and offsets, as provided by the Earth System Research Laboratory.', 'var2_desc': 'Temp, Press, Slp, and Rh are four-dimensional variables representing the Mean Daily Air Temperature at sigma level 995, Mean Daily Pressure at Surface, Mean Daily Sea Level Pressure, and Mean Daily Relative Humidity at sigma level 995, respectively, for days 50 and 51 of the year 2000 on a 144x73 grid, as provided by the Earth System Research Laboratory of the National Oceanic and Atmospheric Administration.'}
saved_pairs_info['pair0053'] = {'var1': ' Ozone concentration', 'var2': ' (Wind speed and Radiation and Temperature)', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0053:\n\nEnvironmental data\n\nx: \tOzon concentration (microgramm/m^3)\n\ny: \t1) wind speed (m/s)\n\t2) global radiation (W/m^2)\n\t3) temperature\n\n\nData source:\nDietrich Stoyan, Helga Stoyan, and Uwe Jansen (1997): \nUmwelstatistik: Statistische Verarbeitung und Analyse von Umweltdaten\nhttp://www.mathe.tu-freiberg.de/Stoyan/umwdat.html#Top\n\n', 'var1_desc': 'Ozone concentration, measured in micrograms per cubic meter (µg/m^3), refers to the amount of ozone present in a specific volume of air, which is a key environmental data used to assess air quality and its potential impact on climate and health.', 'var2_desc': 'Wind speed, radiation, and temperature are environmental variables that can influence the concentration of ozone in the atmosphere, with wind speed affecting the distribution of ozone, global radiation influencing ozone production, and temperature impacting both ozone formation and decay.'}
saved_pairs_info['pair0054'] = {'var1': ' (Displacement and Horsepower and Weight)', 'var2': ' (Fuel cons and ption and Acceleration)', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pairs0054:\n\nAuto-Mpg Data\n\nhttp://archive.ics.uci.edu/ml/datasets/Auto+MPG\n\n1. Sources:\n   (a) Origin:  This dataset was taken from the StatLib library which is\n                maintained at Carnegie Mellon University. The dataset was \n                used in the 1983 American Statistical Association Exposition.\n   (c) Date: July 7, 1993\n\n2. Past Usage:\n    -  See 2b (above)\n    -  Quinlan,R. (1993). Combining Instance-Based and Model-Based Learning.\n       In Proceedings on the Tenth International Conference of Machine \n       Learning, 236-243, University of Massachusetts, Amherst. Morgan\n       Kaufmann.\n\n3. Relevant Information:\n\n   This dataset is a slightly modified version of the dataset provided in\n   the StatLib library.  In line with the use by Ross Quinlan (1993) in\n   predicting the attribute "mpg", 8 of the original instances were removed \n   because they had unknown values for the "mpg" attribute.  The original \n   dataset is available in the file "auto-mpg.data-original".\n\n   "The data concerns city-cycle fuel consumption in miles per gallon,\n    to be predicted in terms of 3 multivalued discrete and 5 continuous\n    attributes." (Quinlan, 1993)\n\nAttribute information:\n\nmpg:           continuous\ndisplacement:  continuous\nhorsepower:    continuous\nweight:        continuous\nacceleration:  continuous\n\n\nx: (displacement, horsepower, weight)\n\ny: (mpg, acceleration)\n\n', 'var1_desc': "Displacement, horsepower, and weight are continuous attributes in the Auto-MPG dataset, representing the engine's cubic capacity, the measure of the car's power, and the car's mass respectively, used to predict city-cycle fuel consumption in miles per gallon.", 'var2_desc': "Fuel consumption and acceleration are key attributes in the Auto-MPG dataset, where fuel consumption refers to the city-cycle fuel usage measured in miles per gallon, and acceleration represents the vehicle's speed increase rate, both of which are continuous variables used for predicting vehicle performance."}
saved_pairs_info['pair0055'] = {'var1': ' Ozone concentration (16-dim.)', 'var2': ' Radiation (16-dim.)', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': "Information for pairs0055:\n\nDaily mean values of ozone values and radiation in the last 83 days of 2009 at 16 different places in Switzerland.\n11 days were deleted due to missing data.\nThe different places are: \n1 Bern-Bollwerk\t\n2 Magadino-Cadenazzo\t\n3 Lausanne-C/'esar-Roux\t\n4 Payerne\t\n5 Lugano-Universita\t\n6 Taenikon\t\n7 Zuerich-Kaserne\t\n8 Laegeren\t\n9 Basel-Binningen\t\n10 Chaumont\t\n11 Duebendorf\t\n12 Rigi-Seebodenalp\t\n13 Haerkingen\t\n14 Davos-See\t\n15 Sion-A/'eroport\t\n16 Jungfraujoch\n\n\nx: Ozone (microgram / cubic meter)\n\ny: Temperature (degree celsius)\n\nsource:\nBundesamt fuer Umwelt (BAFU, Switzerland)\nwww.bafu.admin.ch/luft/luftbelastung/blick_zurueck/datenabfrage/index.html?lang=de\n\n", 'var1_desc': 'The concept of Ozone concentration (16-dim.) refers to the measurement of ozone levels, expressed in micrograms per cubic meter, at 16 different locations in Switzerland over the last 83 days of 2009, with data collected daily, except for 11 days due to missing information.', 'var2_desc': 'Radiation (16-dim.) refers to the daily mean values of solar radiation recorded at 16 different locations in Switzerland over the last 83 days of 2009, providing a multi-dimensional perspective on the radiation levels across various geographical points.'}
saved_pairs_info['pair0056'] = {'var1': ' Female life expectancy 2000-2005', 'var2': ' Latitude', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': "Information for pair0056:\n\nUNdata from http://data.un.org\n\n\nx:\tlife expectancy at birth for different countries, female, 2000-2005\n\ny:\tlatitude of the country's capital\n\nChina, Russia and Canada were taken out.\n\nsource:\nhttp://data.un.org/Data.aspx?d=GenderStat&f=inID%3a37\n\n", 'var1_desc': 'The concept of Female life expectancy 2000-2005 refers to the average number of years a newborn female is expected to live, assuming that current mortality rates remain constant throughout her life, specifically for the years between 2000 and 2005, excluding data from China, Russia, and Canada.', 'var2_desc': "Latitude is a geographical coordinate that specifies the north-south position of a point on the Earth's surface, measured in degrees from the equator, which is 0 degrees, with the poles at 90 degrees North and South."}
saved_pairs_info['pair0057'] = {'var1': ' Female life expectancy 1995-2000', 'var2': ' Latitude', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': "Information for pair0057:\n\nUNdata from http://data.un.org\n\n\nx:\tlife expectancy at birth for different countries, female, 1995-2000\n\ny:\tlatitude of the country's capital\n\nChina, Russia and Canada were taken out.\n\nsource:\nhttp://data.un.org/Data.aspx?d=GenderStat&f=inID%3a37\n\n", 'var1_desc': 'The concept of Female life expectancy 1995-2000 refers to the average number of years a newborn female is expected to live, assuming that current mortality rates remain constant throughout her life, specifically for the years between 1995 and 2000, excluding data from China, Russia, and Canada.', 'var2_desc': "Latitude is a geographical coordinate that specifies the North-South position of a point on the Earth's surface, measured in degrees from the equator, which is 0 degrees, with the poles at 90 degrees North and South."}
saved_pairs_info['pair0058'] = {'var1': ' Female life expectancy 1990-1995', 'var2': ' Latitude', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': "Information for pair0057:\n\nUNdata from http://data.un.org\n\n\nx:\tlife expectancy at birth for different countries, female, 1990-1995\n\ny:\tlatitude of the country's capital\n\nChina, Russia and Canada were taken out.\n\nsource:\nhttp://data.un.org/Data.aspx?d=GenderStat&f=inID%3a37\n\n", 'var1_desc': 'The concept of Female life expectancy 1990-1995 refers to the average number of years a newborn female could expect to live, assuming that current mortality rates remain constant throughout her life, across different countries during the period from 1990 to 1995, excluding China, Russia, and Canada.', 'var2_desc': "Latitude is a geographical coordinate that specifies the north-south position of a point on the Earth's surface, measured in degrees from the equator, which has a latitude of 0 degrees, with the poles at a latitude of 90 degrees north and south."}
saved_pairs_info['pair0059'] = {'var1': ' Female life expectancy 1985-1990', 'var2': ' Latitude', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': "Information for pair0059:\n\nUNdata from http://data.un.org\n\n\nx:\tlife expectancy at birth for different countries, female, 1985-1990\n\ny:\tlatitude of the country's capital\n\nChina, Russia and Canada were taken out.\n\nsource:\nhttp://data.un.org/Data.aspx?d=GenderStat&f=inID%3a37\n\n", 'var1_desc': 'Female life expectancy 1985-1990 refers to the average number of years a newborn female could expect to live, assuming that current mortality rates remain constant throughout her life, during the period from 1985 to 1990, based on data collected from various countries excluding China, Russia, and Canada.', 'var2_desc': "Latitude is a geographical coordinate that specifies the north-south position of a point on the Earth's surface, measured in degrees from the equator, which is 0 degrees, with the poles at 90 degrees north and south."}
saved_pairs_info['pair0060'] = {'var1': ' Male life expectancy 2000-2005', 'var2': ' Latitude', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': "Information for pair0060:\n\nUNdata from http://data.un.org\n\n\nx:\tlife expectancy at birth for different countries, male, 2000-2005\n\ny:\tlatitude of the country's capital\n\nChina, Russia and Canada were taken out.\n\nsource:\nhttp://data.un.org/Data.aspx?d=GenderStat&f=inID%3a37\n\n", 'var1_desc': 'Male life expectancy from 2000-2005 refers to the average number of years a newborn male could expect to live, assuming that current mortality rates remain constant throughout his life, during the period of 2000-2005, for various countries excluding China, Russia, and Canada.', 'var2_desc': "Latitude is a geographical coordinate that specifies the north-south position of a point on the Earth's surface, measured in degrees from the equator, with positive values indicating locations north of the equator and negative values indicating locations south of the equator."}
saved_pairs_info['pair0061'] = {'var1': ' Male life expectancy 1995-2000', 'var2': ' Latitude', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': "Information for pair0061:\n\nUNdata from http://data.un.org\n\n\nx:\tlife expectancy at birth for different countries, male, 1995-2000\n\ny:\tlatitude of the country's capital\n\nChina, Russia and Canada were taken out.\n\nsource:\nhttp://data.un.org/Data.aspx?d=GenderStat&f=inID%3a37\n\n", 'var1_desc': 'The concept of Male life expectancy 1995-2000 refers to the average number of years a newborn male could expect to live, assuming that current mortality rates remain constant throughout his life, across different countries during the period from 1995 to 2000, excluding China, Russia, and Canada.', 'var2_desc': "Latitude is a geographical coordinate that specifies the north-south position of a point on the Earth's surface, measured in degrees from the equator, which has a latitude of 0 degrees."}
saved_pairs_info['pair0062'] = {'var1': ' Male life expectancy 1990-1995', 'var2': ' Latitude', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': "Information for pair0062:\n\nUNdata from http://data.un.org\n\n\nx:\tlife expectancy at birth for different countries, male, 1990-1995\n\ny:\tlatitude of the country's capital\n\nChina, Russia and Canada were taken out.\n\nsource:\nhttp://data.un.org/Data.aspx?d=GenderStat&f=inID%3a37\n\n", 'var1_desc': 'Male life expectancy 1990-1995 refers to the average number of years a newborn male could expect to live, assuming that current mortality rates remain constant throughout his life, during the period from 1990 to 1995, for different countries excluding China, Russia, and Canada.', 'var2_desc': "Latitude is a geographical coordinate that specifies the north-south position of a point on the Earth's surface, measured in degrees from the equator, with positive values indicating locations north of the equator and negative values indicating locations south of the equator."}
saved_pairs_info['pair0063'] = {'var1': ' Male life expectancy 1985-1990', 'var2': ' Latitude', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': "Information for pair0063:\n\nUNdata from http://data.un.org\n\n\nx:\tlife expectancy at birth for different countries, male, 1985-1990\n\ny:\tlatitude of the country's capital\n\nChina, Russia and Canada were taken out.\n\nsource:\nhttp://data.un.org/Data.aspx?d=GenderStat&f=inID%3a37\n\n", 'var1_desc': 'Male life expectancy from 1985-1990 refers to the average number of years a newborn male could expect to live, assuming that current mortality rates remain constant throughout his life, during the period of 1985-1990, for different countries excluding China, Russia, and Canada.', 'var2_desc': "Latitude is a geographic coordinate that specifies the north-south position of a point on the Earth's surface, measured in degrees from the equator, which has a latitude of 0 degrees, with the poles being at 90 degrees north and south."}
saved_pairs_info['pair0064'] = {'var1': ' Drinking water access', 'var2': ' Infant mortality', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0064:\n\nUNdata from http://data.un.org\n\n\nx:\tPopulation with sustainable access to improved drinking water sources (%) total, 2006\n\ny:\tInfant mortality rate (per 1 000 live births) both sexes, 2006\n\nsource:\nhttp://data.un.org/Data.aspx?d=WHO&f=inID%3aMBD10\nhttp://data.un.org/Data.aspx?d=WHO&f=inID%3aRF03\n\n', 'var1_desc': 'Drinking water access refers to the percentage of a population that has a reliable and safe supply of water suitable for consumption and domestic use, as measured by UNdata in 2006.', 'var2_desc': "Infant mortality refers to the death of infants under one year of age, typically expressed as a rate per 1,000 live births, and is often used as an indicator of a country's health status and quality of healthcare services."}
saved_pairs_info['pair0065'] = {'var1': ' Stock return of Hang Seng Bank', 'var2': ' Stock return of HSBC Hldgs', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0065:\n\nFinancial data:\n\nx:\tstock returns of Hang Seng Bank (0011.HK) \n\ny:\tstock return of HSBC Hldgs (0005.HK) \n\nfrom Jan. 4, 2000 to Jun. 17, 2005. Data was obtained from the Yahoo finance database. HSBC owns about 60% of Hang Seng Bank.\n\nRemark: We calculated the above returns from the raw data in the following way.\n1. Extract the dividend/split adjusted closing price data from Yahoo Finance http://finance.yahoo.com/.\n2. For the few days when the price is not available, we use simple linear interpolation to estimate the price.  Consequently the two time series are aligned.\n3. For each stock, denote the closing price on day t by P_t, and the corresponding return is calculated by X_t = (P_t-P_{t-1 }) / P_{t-1 }.\n\n', 'var1_desc': "The stock return of Hang Seng Bank (0011.HK) refers to the percentage change in the dividend/split adjusted closing price of the bank's stock from one trading day to the next, between January 4, 2000 and June 17, 2005, as obtained from the Yahoo Finance database, with missing price data estimated via simple linear interpolation.", 'var2_desc': "The stock return of HSBC Holdings (0005.HK) refers to the percentage change in the dividend/split adjusted closing price of HSBC's shares from one trading day to the next, between January 4, 2000 and June 17, 2005, as obtained from the Yahoo Finance database, with missing data points estimated through simple linear interpolation."}
saved_pairs_info['pair0066'] = {'var1': ' Stock return of Hutchison', 'var2': ' Stock return of Cheung kong', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0066:\n\nFinancial data\n\nx:\tstock returns of Hutchison (0013.HK)\n\ny:\tstock return of Cheung kong (0001.HK)\n\nfrom Jan. 4, 2000 to Jun. 17, 2005. Data was obtained from the Yahoo finance database. Cheung kong owns about 50% of Hutchison.\n\nRemark: We calculated the above returns from the raw data in the following way.\n1. Extract the dividend/split adjusted closing price data from Yahoo Finance http://finance.yahoo.com/.\n2. For the few days when the price is not available, we use simple linear interpolation to estimate the price.  Consequently the two time series are aligned.\n3. For each stock, denote the closing price on day t by P_t, and the corresponding return is calculated by X_t = (P_t-P_{t-1 }) / P_{t-1 }.\n\n', 'var1_desc': "The stock return of Hutchison (0013.HK) refers to the percentage change in the company's dividend/split adjusted closing price over a specific period, in this case from January 4, 2000 to June 17, 2005, calculated using data extracted from Yahoo Finance and adjusted for any missing values through simple linear interpolation.", 'var2_desc': "The stock return of Cheung Kong (0001.HK) refers to the percentage change in the company's stock price, from one trading day to the next, between January 4, 2000, and June 17, 2005, as calculated from the dividend/split adjusted closing price data obtained from Yahoo Finance, with missing price data estimated through simple linear interpolation."}
saved_pairs_info['pair0067'] = {'var1': ' Stock return of Cheung kong', 'var2': ' Stock return of Sun Hung Kai Prop.', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0067:\n\nFinancial data\n\nx:\tstock returns of Cheung kong (0001.HK)\n\ny:\tstock return of Sun Hung Kai Prop. (0016.HK)\n\nfrom Jan. 4, 2000 to Jun. 17, 2005. Data was obtained from the Yahoo finance database. Sun Hung Kai Prop. is a typical stock in the Hang Seng Property subindex, and is believed to depend on other majoy stocks.\n\nRemark: We calculated the above returns from the raw data in the following way.\n1. Extract the dividend/split adjusted closing price data from Yahoo Finance http://finance.yahoo.com/.\n2. For the few days when the price is not available, we use simple linear interpolation to estimate the price.  Consequently the two time series are aligned.\n3. For each stock, denote the closing price on day t by P_t, and the corresponding return is calculated by X_t = (P_t-P_{t-1 }) / P_{t-1 }.\n\n\n', 'var1_desc': 'The stock return of Cheung Kong (0001.HK) refers to the percentage change in its dividend/split adjusted closing price over a specific period, from January 4, 2000 to June 17, 2005, as calculated from raw data obtained from Yahoo Finance, with missing price data estimated through simple linear interpolation.', 'var2_desc': 'The stock return of Sun Hung Kai Properties (0016.HK) refers to the percentage change in its dividend/split adjusted closing price over a given period, from January 4, 2000 to June 17, 2005, as calculated from data obtained from Yahoo Finance and adjusted for instances when price data was not available.'}
saved_pairs_info['pair0068'] = {'var1': ' Bytes sent', 'var2': ' Open http connections', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0068:\n\nTimeseries data - Internet connections and traffic at the MPI for Intelligent Systems.\n\nX(t) - bytes sent at minute t.\nY(t) - open http connections during that minute \n\nMeasurements are taken every 20 minutes. \n\n', 'var1_desc': "In the context of timeseries data for Internet connections and traffic at the MPI for Intelligent Systems, 'Bytes sent' refers to the quantity of digital information, measured in bytes, transmitted from the system over the Internet in a specific minute.", 'var2_desc': 'Open HTTP connections refer to the active network links established between a client and server over the HTTP protocol during a specific time period, which in this context, is measured every minute.'}
saved_pairs_info['pair0069'] = {'var1': ' Inside temperature', 'var2': ' Outside temperature', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0069:\n\nTimeseries data - Inside (room) and outside temperature\n\nX(t) - inside room temperature in degrees Celsius\nY(t) - outside temperature in degrees Celsius\n\nMeasurements were taken every 5 minutes.\n\nData provided by Joris M. Mooij.\n\n', 'var1_desc': 'The inside temperature, denoted as X(t), refers to the measurement of the thermal condition within a room, expressed in degrees Celsius, and recorded every 5 minutes as part of a timeseries data set provided by Joris M. Mooij.', 'var2_desc': 'The outside temperature, denoted as Y(t) in this context, refers to the ambient atmospheric temperature measured in degrees Celsius, recorded every 5 minutes as part of a time series data set provided by Joris M. Mooij.'}
saved_pairs_info['pair0070'] = {'var1': ' Parameter', 'var2': ' Answer', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': "Information for pair0070:\n\ndata taken from\nR. Armann and I.Buelthoff, 2010,\nin preparation\nMPI for Biological Cybernetics, Tuebingen, Germany\n \n\nDiscrete data:\n\nArtificial faces were shown and varied between male and female.\nPeople had to say if it is male or female.\n\nThe variables are:\n\nX = par (between 0 and 14, 0 -> very female, 14 -> very male)\n\nY = sex_guess (0: female or 1: male, the subject's guess)\n\n", 'var1_desc': 'A parameter, in the context of this study, refers to a quantifiable characteristic or feature of a system or experiment, such as the perceived gender of an artificial face, which can be manipulated or measured to observe its effect on the outcome.', 'var2_desc': "The concept of 'Answer' in this context refers to the participant's response or guess about the gender of the shown artificial face, which is represented as '0' for female and '1' for male."}
saved_pairs_info['pair0071'] = {'var1': ' Symptoms (6-dim.)', 'var2': ' Classification of disease (2-dim.)', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': "Information for pair0071:\n\nData was taken from the UCI repository\nhttp://archive.ics.uci.edu/ml/datasets/Acute+Inflammations\n\nJacek Czerniak, Ph.D., Assistant Professor\nSystems Research Institute\nPolish Academy of Sciences\nLaboratory of Intelligent Systems\nul. Newelska 6, Room 218\n01-447 Warszawa, Poland\ne-mail: jacek.czerniak '@' ibspan.waw.pl or jczerniak '@' ukw.edu.pl \n \nAcute inflammation of urinary bladder is characterised\nby sudden occurrence of pains in the abdomen region and the urination in form of\nconstant urine pushing, micturition pains and sometimes lack of urine keeping.\nTemperature of the body is rising, however most often not above 38C. The excreted\nurine is turbid and sometimes bloody. At proper treatment, symptoms decay usually\nwithin several days. However, there is inclination to returns. At persons with acute\ninflammation of urinary bladder, we should expect that the illness will turn into\nprotracted form.\n\nAcute nephritis of renal pelvis origin occurs considerably more often at women than at\nmen. It begins with sudden fever, which reaches, and sometimes exceeds 40C. The fever\nis accompanied by shivers and one- or both-side lumbar pains, which are sometimes very\nstrong. Symptoms of acute inflammation of urinary bladder appear very often. Quite not\ninfrequently there are nausea and vomiting and spread pains of whole abdomen.\n\nThe data was created by a medical expert as a data set to test the expert system, which\nwill perform the presumptive diagnosis of two diseases of urinary system. \nEach instance represents an potential patient. \n\nDiscrete and logical data:\n1 = yes, 2 = no\n\nX is 6-dimensional\n1 \tTemperature of patient { 35C-42C } \n2 \tOccurrence of nausea \n3 \tLumbar pain\n4 \tUrine pushing (continuous need for urination)\n5 \tMicturition pains\n6 \tBurning of urethra, itch, swelling of urethra outlet\n\nY is 2-dimensional\n7\tdecision: Inflammation of urinary bladder\n8 \tdecision: Nephritis of renal pelvis origin \n\n\n", 'var1_desc': "The 6-dimensional symptoms in this context refer to the six key indicators used in the data set to diagnose urinary system diseases, namely: patient's temperature, occurrence of nausea, lumbar pain, continuous need for urination (urine pushing), micturition pains, and burning, itching, or swelling of the urethra outlet.", 'var2_desc': 'The concept of Classification of disease (2-dim.) in this context refers to the process of categorizing potential patients into two specific urinary system diseases, namely Inflammation of urinary bladder and Nephritis of renal pelvis origin, based on a set of six symptoms and conditions.'}
saved_pairs_info['pair0072'] = {'var1': ' Sunspots', 'var2': ' Global mean temperature', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0072:\n\nsunspot data - timeseries with monthly values\nfrom 5/1874 - 4/2010 = 1632 values\n\n\ntemperature data taken from \nhttp://www.cru.uea.ac.uk/cru/data/temperature/\n\nsunspot data taken from\nhttp://solarscience.msfc.nasa.gov/SunspotCycle.shtml\n\nX = sunspot area\n\nY = global mean temperature anomalies (deviations from 1961-1990) in °C\n\n', 'var1_desc': "Sunspots are temporary phenomena on the Sun's photosphere that appear as spots darker than the surrounding areas due to lower temperatures, often used in research for their correlation with global temperature anomalies.", 'var2_desc': "The Global mean temperature is a measure of the average temperature of the Earth's surface, calculated from temperature readings taken worldwide and averaged over time, often used to assess climate trends and anomalies."}
saved_pairs_info['pair0073'] = {'var1': ' CO2 emissions', 'var2': ' Energy use', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0073:\n\nEnergy - emission data from 152 countries  between 1960 and 2005\n\nfrom UNdata (http://data.un.org)\n\n\nx: CO2 emissions for different countries in different years\n\ny: Energy use (kg of oil equivalent per capita) for different countries in different years\n\n\n\n', 'var1_desc': 'CO2 emissions refer to the release of carbon dioxide, a greenhouse gas, into the atmosphere, primarily through the burning of fossil fuels for energy, which is quantified on a country-by-country basis and tracked over time to assess environmental impact and energy efficiency.', 'var2_desc': 'Energy use, in this context, refers to the amount of energy consumed per person in a given country, measured in kilograms of oil equivalent per year.'}
saved_pairs_info['pair0074'] = {'var1': ' GNI per capita', 'var2': ' Life expectancy', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0074:\n\nUNdata from http://data.un.org\n\nGross national income (GNI) and life expectancy. \nOne can see the GNI per capita as an index how rich a country is. \nThe wealth of a country influences the qualtity of the health care system and thus indirectly the life expectancy of its citizens.\n\nx: GNI (Gross national income) per capita for different countries (in US$)\n\ny: life expectancy at birth for different countries\n\n\n', 'var1_desc': "GNI per capita is a measure of a country's economic output per person, calculated by dividing the gross national income by the total population, and is often used as an indicator of a nation's wealth and living standards.", 'var2_desc': 'Life expectancy is a statistical measure indicating the average number of years a newborn is expected to live, given current age-specific mortality rates in a particular country.'}
saved_pairs_info['pair0075'] = {'var1': ' Under-5 mortality rate', 'var2': ' GNI per capita', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0075:\n\nUNdata from http://data.un.org\n\nGross national income (GNI) and life expectancy. \nOne can see the GNI per capita as an index how rich a country is. \nThe wealth of a country influences the qualtity of the health care system and thus indirectly the mortality rate of its children.\n\n\nx: under 5 mortality rate for different countries (deaths per 1000 live births)\n\ny: GNI (Gross national income) per capita for different countries (in US$)\n\n', 'var1_desc': "The Under-5 mortality rate is a statistical measure representing the number of deaths of children under five years of age per 1000 live births, often used as an indicator of a country's overall health and well-being.", 'var2_desc': "GNI per capita is a measure of a country's economic wealth per person, calculated by dividing the gross national income by the total population."}
saved_pairs_info['pair0076'] = {'var1': ' Population growth', 'var2': ' Food consumption growth', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0076\n \npopulation growth and food consumption \n \ndata for 174 countries or areas, during the period from 1990-92 to 1995-97 (former 174 data points) and that from 1995-97 to 2000-02 (latter 174 points).  \n \nThe data were taken from food security statistics provided by Food and Agriculture Organization of the United Nations (http://www.docstoc.com/docs/102679223/Food-consumption-and-population-growth---FAO);\nsee http://www.fao.org/economic/ess/ess-fs/en/ .\n \nx (first column): the average annual rate of change of population;\ny (second column): the average annual rate of change of total dietary consumption for total population (kcal/day).\n\nTheir difference (y-x) can be interpreted as the average annual rate of change of dietary energy consumption (kcal/person/day).\n\n', 'var1_desc': 'Population growth refers to the increase in the number of individuals in a population over a specific period of time, often expressed as an average annual rate of change.', 'var2_desc': 'Food consumption growth refers to the average annual rate of change in total dietary energy intake for the entire population, measured in kilocalories per day, and it reflects changes in dietary habits and food availability over time.'}
saved_pairs_info['pair0077'] = {'var1': ' Temperature', 'var2': ' Solar radiation', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0077:\n\nNote: X and Y are actually time series with time step 1 day \n\nY is the solar radiation in W/m^2 measured in Furtwangen, Black Forest, Germany, by Bernward Janzing.\nEach value is the daily average of one day between January 1, 1985 and December 31, 2008.\n\nX is the daily average temperature of the air measured at the same location and the same days.\n\nThe data are taken from a larger data set containing solar radiation averages from November 24, 1985 to December 31, 2008\nand temperature values from January 1, 1979 to  December 31, 2008.\nThe original files have been provided by Bernward Janzing and processed by Dominik Janzing to extract the common time interval.\n\n', 'var1_desc': 'Temperature is a quantitative measure of the degree of heat present in a substance or object, often measured in degrees Celsius, Fahrenheit, or Kelvin, and in this context, it refers to the daily average air temperature recorded in Furtwangen, Black Forest, Germany, from January 1, 1985, to December 31, 2008.', 'var2_desc': 'Solar radiation refers to the electromagnetic energy emitted by the sun, measured in watts per square meter (W/m^2), which is crucial for various natural processes on Earth, including weather patterns and photosynthesis.'}
saved_pairs_info['pair0078'] = {'var1': ' PPFD', 'var2': ' Net Ecosystem Productivity', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0078:\n\nLight Response Data\n\nTaken at a flux tower at site DE-Hai\nlatitude: 51.08?N\nlongitude: 10.45?E\n\nThe filtered version of the data is taken from:\n\nMoffat A.M. (2012): Assessing competing semi-empirical equations:\nLight response curve (Chapter 7). In: /A new methodology to interpret\nhigh resolution measurements of net carbon fluxes between terrestrial\necosystems and the atmosphere/. pp. 68-80. Doctoral thesis, Friedrich\nSchiller University, Jena.\n\n- Response of NEP on total PPFD\n\nFirst column (x): PPFD (Photosynthetic Photon Flux Density)\n- a measure of light intensity in terms of photons, that are available for photosynthesis\n- i.e. the number of photons falling on a 1 meter square area per second\n- only the photons with a wavelength of 400-700nm (visible light) are available for photosynthesis\n- unit [?mol/(m?s)]\nPPFD(total) = PPFDdif + PPFDdir\n\n\nSecond column (y): NEP (Net Ecosystem Productivity)\n- a measure of the carbon flux\n- calculated by photosynthetic uptake MINUS release by respiration\n- is known to be driven by PPFD\n- unit: [?mol/(m?s)]\n\n\n', 'var1_desc': 'PPFD, or Photosynthetic Photon Flux Density, is a measure of the intensity of light, specifically the number of photons within the visible light spectrum (400-700nm) that fall on a one square meter area per second, which are available for photosynthesis.', 'var2_desc': 'Net Ecosystem Productivity (NEP) is a measure of the carbon flux in an ecosystem, calculated by subtracting the carbon released by respiration from the carbon absorbed through photosynthesis, and is influenced by the Photosynthetic Photon Flux Density (PPFD).'}
saved_pairs_info['pair0079'] = {'var1': ' Net Ecosystem Productivity', 'var2': ' Diffuse PPFDdif', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0079:\n\nLight Response Data\n\nTaken at a flux tower at site DE-Hai\nlatitude: 51.08?N\nlongitude: 10.45?E\n\nThe filtered version of the data is taken from:\n\nMoffat A.M. (2012) Assessing competing semi-empirical equations:\nLight response curve (Chapter 7). In: /A new methodology to interpret\nhigh resolution measurements of net carbon fluxes between terrestrial\necosystems and the atmosphere/. pp. 68-80. Doctoral thesis, Friedrich\nSchiller University, Jena.\n\n- Response of NEP on PPFDdif\n\nFirst column (x): NEP (Net Ecosystem Productivity)\n- a measure of the carbon flux\n- calculated by photosynthetic uptake MINUS release by respiration\n- is known to be driven by PPFDdif and PPFDdir\n- unit: [?mol/(m?s)]\n\nSecond column (y): PPFDdif (Photosynthetic Photon Flux Density, diffusive)\n- a measure of light intensity in terms of diffusive photons, that are available for photosynthesis\n- i.e. the number of diffusive photons falling on a 1 meter square area per second\n- only the photons with a wavelength of 400-700nm (visible light) are available for photosynthesis\n- unit [?mol/(m?s)]\nPPFD(total) = PPFDdif + PPFDdir\n\n\n', 'var1_desc': 'Net Ecosystem Productivity (NEP) is a measure of the carbon flux within an ecosystem, calculated by subtracting the carbon released through respiration from the carbon absorbed through photosynthesis, which is influenced by factors such as light intensity.', 'var2_desc': 'Diffuse PPFDdif refers to the measure of light intensity in terms of diffusive photons, specifically those with a wavelength of 400-700nm (visible light), that are available for photosynthesis, quantified as the number of these photons falling on a one square meter area per second.'}
saved_pairs_info['pair0080'] = {'var1': ' Net Ecosystem Productivity', 'var2': ' Direct PPFDdir', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0080:\n\nLight Response Data\n\nTaken at a flux tower at site DE-Hai\nlatitude: 51.08?N\nlongitude: 10.45?E\n\nThe filtered version of the data is taken from:\n\nMoffat A.M. (2012) Assessing competing semi-empirical equations:\nLight response curve (Chapter 7). In: /A new methodology to interpret\nhigh resolution measurements of net carbon fluxes between terrestrial\necosystems and the atmosphere/. pp. 68-80. Doctoral thesis, Friedrich\nSchiller University, Jena.\n\n- Response of NEP on PPFDdir\n\nFirst column (x): NEP (Net Ecosystem Productivity)\n- a measure of the carbon flux\n- calculated by photosynthetic uptake MINUS release by respiration\n- is known to be driven by PPFDdif and PPFDdir\n- unit: [?mol/(m?s)]\n\nSecond column (y): PPFDdir (Photosynthetic Photon Flux Density, direct)\n- a measure of direct solar light intensity in terms of photons, that are available for photosynthesis\n- i.e. the number of direct photons falling on a 1 meter square area per second\n- only the photons with a wavelength of 400-700nm (visible light) are available for photosynthesis\n- unit [?mol/(m?s)]\nPPFD(total) = PPFDdif + PPFDdir\n\n\n', 'var1_desc': 'Net Ecosystem Productivity (NEP) is a measure of the carbon flux within an ecosystem, calculated by subtracting the carbon release due to respiration from the carbon uptake through photosynthesis.', 'var2_desc': 'Direct PPFDdir refers to the measure of direct solar light intensity in terms of photons, specifically those within the 400-700nm wavelength (visible light) that are available for photosynthesis, falling on a one-meter square area per second.'}
saved_pairs_info['pair0081'] = {'var1': ' Temperature', 'var2': ' Local CO2 flux', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0081:\n\nFluxnet data\n\nThe folder contains flux data where fluxes are net CO2 exchanges aggregated\nover night, and the temperature accordingly (from http://fluxnet.ornl.gov). \n\nThe site id is BE-Bra\nBelgium, Brasschaat\n\nYou have 365 values per site (one year).\n\nThe night time CO2 is "respiration" the flux from the ecosystem to the\natmosphere but no assumptions on flux partitioning!!! The price is high noise\nlevels.\n\nx: Temperature in degree Celsius\n\ny: CO2 flux at night\n\nPleases note that column 3 contains either NaN  if the data were filled,\nor 1 for credible values. \n\n\n', 'var1_desc': "Temperature, represented as 'x' in the dataset, refers to the atmospheric condition measured in degrees Celsius, which is used in the study of net CO2 exchanges during nighttime.", 'var2_desc': 'Local CO2 flux refers to the net exchange of carbon dioxide between a specific ecosystem and the atmosphere, typically measured at night to capture ecosystem respiration, with data often subject to high noise levels due to environmental variability.'}
saved_pairs_info['pair0082'] = {'var1': ' Temperature', 'var2': ' Local CO2 flux', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0082:\n\nFluxnet data\n\nThe folder contains flux data where fluxes are net CO2 exchanges aggregated\nover night, and the temperature accordingly (from http://fluxnet.ornl.gov). \n\nThe site id is DE-Har\nGermany, Hartheim\n\nYou have 365 values per site (one year).\n\nThe night time CO2 is "respiration" the flux from the ecosystem to the\natmosphere but no assumptions on flux partitioning!!! The price is high noise\nlevels.\n\nx: Temperature in degree Celsius\n\ny: CO2 flux at night\n\nPleases note that column 3 contains either NaN  if the data were filled,\nor 1 for credible values. \n\n\n', 'var1_desc': 'Temperature, in the given context, refers to the ambient atmospheric temperature measured in degrees Celsius, which is used in correlation with the CO2 flux at night to study net CO2 exchanges in the ecosystem.', 'var2_desc': 'Local CO2 flux refers to the net exchange of carbon dioxide between a specific ecosystem and the atmosphere, typically measured over a specific period such as nightly, and can be influenced by factors such as temperature.'}
saved_pairs_info['pair0083'] = {'var1': ' Temperature', 'var2': ' Local CO2 flux', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0083:\n\nFluxnet data\n\nThe folder contains flux data where fluxes are net CO2 exchanges aggregated\nover night, and the temperature accordingly (from http://fluxnet.ornl.gov). \n\nThe site id is US-PFa\nUS, Park Falls\n\nYou have 365 values per site (one year).\n\nThe night time CO2 is "respiration" the flux from the ecosystem to the\natmosphere but no assumptions on flux partitioning!!! The price is high noise\nlevels.\n\nx: Temperature in degree Celsius\n\ny: CO2 flux at night\n\nPleases note that column 3 contains either NaN  if the data were filled,\nor 1 for credible values. \n\n\n', 'var1_desc': 'Temperature, in this context, refers to the ambient atmospheric temperature measured in degrees Celsius, which is used to study its correlation with the net CO2 flux at night from the ecosystem to the atmosphere.', 'var2_desc': "Local CO2 flux refers to the net exchange of carbon dioxide between a specific ecosystem and the atmosphere, typically measured at night to capture the ecosystem's respiration without the influence of photosynthesis."}
saved_pairs_info['pair0084'] = {'var1': ' Employment', 'var2': ' Population', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair 98\n \nnatural logarithm of employment and natural logarithm of population \n \ndata for 3102 counties in US in 1980.  \n \nThe data were taken from US county-level growth data set from Journal of Applied Econometrics web site.  Currently available at\nhttp://www.spatial-econometrics.com/data/contents.html .\n \nx (first column): the natural logarithm of employment in 1980 in 3102 counties in US;\ny (second column): the natural logorithm of the corresponding population.\n\nIt seems reanable that the total population causes the employment, not vice versa. The difference (x-y) can be considered as the natural logarithm of the propotion of the emloyment.\n\n', 'var1_desc': 'Employment refers to the state of having a paid job or occupation, and in the context of this data, it is represented by the natural logarithm of the total number of individuals employed in 3102 counties in the US in 1980.', 'var2_desc': 'Population refers to the total number of inhabitants residing in a specific geographical area, such as a county, at a given time, in this case, 1980.'}
saved_pairs_info['pair0085'] = {'var1': ' Time of measurement', 'var2': ' Protein content of milk', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Description of pair0085:\n\nThis data set is extracted from that for the milk protein trial used by Verbyla and Cullis (1990).  The original data set consists of assayed protein content of milk samples taken weekly from each of 79 cows.  The cows were randomly allocated to one of three diets: barley, mixed barley-lupins, and lupins, with 25, 27, and 27 cows in the three groups, respectively.  Measurements were taken for up to 19 weeks, but there were 38 drop-outs from week 15 onwards, corresponding to cows who stopped producing milk before the end of the experiment.\n\nWe removed the missing values (drop-outs) in the data set: we did not consider the measurements from week 15 onwards, which contain many drop-outs, and we discarded the cows with drop-outs before week 15.  Finally, the data set contains 71 cows and 14 weeks.  Furthermore, we re-organized the data set to see the relationship between the milk protein and the time to take the measurement:\n\nX : time to take weekly measurements (from 1 to 14).\n\nY : protein content of the milk produced by each cow at time X.\n\n\nGroup truth: \n\nX -> Y.\n\n\nRemark:\n\nHere we do not consider the effect of the diets on the protein content.  Note that rigorously speaking, X, together with the diets, causes Y.\n\nThe original data set is available at\nhttp://www.maths.lancs.ac.uk/Software/Oswald/\n\n\nReference:\nA. P. Verbyla and B. R. CullisSource, "Modelling in Repeated Measures Experiments", Journal of the Royal Statistical Society. Series C (Applied Statistics), Vol. 39, No. 3(1990), pp. 341-356.\n', 'var1_desc': 'The "Time of Measurement" refers to the specific point or period during the study when data is collected, in this case, the weekly intervals at which the protein content of milk produced by each cow was measured over a span of 14 weeks.', 'var2_desc': "The protein content of milk refers to the concentration of essential amino acids present in the milk, which is a key factor in determining its nutritional value and quality, and can be influenced by various factors such as the cow's diet and lactation period."}
saved_pairs_info['pair0086'] = {'var1': ' Size of apartment', 'var2': ' Monthly rent', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'First column:  size in m^2 of appartment/room\nSecond column: monthly rent in EUR\n\nData from kamernet.nl, gathered by Joris Mooij in 2007\n\nUPDATE v1.0: We removed two instances, with respectively size == 0 and rent == 1\n', 'var1_desc': "The size of an apartment, measured in square meters (m^2), refers to the total floor area within the apartment's interior perimeter, including all rooms and interior spaces.", 'var2_desc': 'Monthly rent refers to the regular payment, expressed in Euros, made by a tenant to a landlord for the use of a specific apartment or room, as per the data collected from kamernet.nl by Joris Mooij in 2007.'}
saved_pairs_info['pair0087'] = {'var1': ' Temperature', 'var2': ' Total snow', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Whistler Daily Snowfall (from http://www.mldata.org/repository/data/viewslug/whistler-daily-snowfall/)\n\nHistorical daily snowfall data in Whistler, BC, Canada over the period July 1 1972 to December 31 2009. Measured at top of Whistler Gondola: Latitude: 50Â°04\'04.000" N Longitude: 122Â°56\'50.000" W Elevation: 1835.00 m \n\nTwo attributes were selected: \nX = MeanTemp (deg Celsius)\nY = TotalSnow (cm)\n\nCommon sense tells us that X causes Y (with maybe very small feedback of Y on X). Confounders are present (e.g., day of the year).\n\nX-->Y\n', 'var1_desc': 'Temperature is a physical quantity that expresses the degree of heat or cold in a body or environment, often measured in degrees Celsius, Fahrenheit, or Kelvin.', 'var2_desc': 'Total Snow is the cumulative measurement of snowfall in centimeters recorded at the top of Whistler Gondola, Whistler, BC, Canada, during a specified time period.'}
saved_pairs_info['pair0088'] = {'var1': ' Age', 'var2': ' Relative spinal bone mineral density', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'x = age\ny = Relative Spinal bone mineral density\n\nSource:\n\n"bone" data set from http://cran.r-project.org/web/packages/ElemStatLearn/ElemStatLearn.pdf\n\nWe obtained and preprocessed the data using the following R commands:\n\ninstall.packages(\'ElemStatLearn\')\nlibrary(ElemStatLearn)\nboneuniq = bone[1,]\nfor( i in 2:dim(bone)[1] ) { \n  if( bone[i,1] != bone[i-1,1] ) { \n    boneuniq <- rbind(boneuniq,bone[i,])\n  } \n}\nwrite.table(boneuniq[,c(2,4)],file=\'/tmp/bone.csv\',row.names=FALSE,col.names=FALSE)\nremove.packages(\'ElemStatLearn\')\n\nFor the preprocessing, we only take the first measurement for each child, and \nselected only the columns "age" and "spnbmd" (Relative Change in Spinal BMD).\n\n', 'var1_desc': 'Age is a numerical variable representing the length of time a person has lived, often measured in years, and is commonly used in biomedical research to study its effects on various health parameters, such as bone mineral density.', 'var2_desc': 'Relative spinal bone mineral density (spnbmd) is a measure of the amount of mineral content in the spinal bone relative to its size, which can indicate bone strength and the risk of osteoporosis or fractures.'}
saved_pairs_info['pair0089'] = {'var1': ' root decomposition Oct (grassl)', 'var2': ' root decomposition Oct (grassl)', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'x = Mass loss OCTOBER 2012 in %\ny = Mass loss APRIL 2012 in %\n\nFine root decomposition data. 150 Litterbags containing fine roots were put underground to\nstudy decomposition rates in different ecosystems. This data set contains all experiments done in forests.\ny denotes the mass loss 6 months after the start of the experiment.\ny denotes the mass loss 1 year after the start of the experiment.\nPlots with missing data were excluded.\n\nData taken from\nSolly et al (2014). Factors controlling decomposition rates of fine root litter in temperate forests and grasslands. Plant and Soil, 382(1-2), 203-218.\n\nCommon sense tells us that at a certain point in time the total material decomposed depends on the amount decomposed at an earlier point in time.\n\ny --> x\n', 'var1_desc': 'Root decomposition Oct (grassl) refers to the percentage of mass loss in fine root litter observed in grassland ecosystems in October, six months after the start of the decomposition experiment.', 'var2_desc': 'Root decomposition Oct (grassl) refers to the percentage of mass loss in fine root litter observed in grassland ecosystems in October, six months after the start of the decomposition experiment.'}
saved_pairs_info['pair0090'] = {'var1': ' root decomposition Oct (forest)', 'var2': ' root decomposition Oct (forest)', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'x = Mass loss OCTOBER 2012 in %\ny = Mass loss APRIL 2012 in %\n\nFine root decomposition data. 150 Litterbags containing fine roots were put underground to\nstudy decomposition rates in different ecosystems. This data set contains all experiments done in grasslands.\ny denotes the mass loss 6 months after the start of the experiment.\ny denotes the mass loss 1 year after the start of the experiment.\nPlots with missing data were excluded.\n\nData taken from\nSolly et al (2014). Factors controlling decomposition rates of fine root litter in temperate forests and grasslands. Plant and Soil, 382(1-2), 203-218.\n\nCommon sense tells us that at a certain point in time the total material decomposed depends on the amount decomposed at an earlier point in time.\n\ny --> x\n', 'var1_desc': 'Root decomposition in October (forest) refers to the percentage of mass loss from fine root litter in forest ecosystems, measured one year after the start of the decomposition experiment.', 'var2_desc': 'Root decomposition in October (forest) refers to the percentage of mass loss from fine root litter in forest ecosystems, measured one year after the start of the decomposition experiment.'}
saved_pairs_info['pair0091'] = {'var1': ' clay cont. in soil (forest)', 'var2': ' soil moisture', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'x = Clay content in soil (in gram per g/kg)\ny = Soil moisture at 10cm depth (in %)\n\nData taken from 150 forest plots. Plots with missing data were excluded.\n\nThe clay content in a soil is one determinant of how much water a soil can hold.\n\nReference:\nSolly et al (2014). Factors controlling decomposition rates of fine root litter in temperate forests and grasslands. Plant and Soil, 382(1-2), 203-218.\n\nx --> y\n', 'var1_desc': "The clay content in forest soil, quantified in grams per kilogram, is a key factor influencing the soil's moisture retention capacity at a depth of 10cm, as evidenced in a study of 150 forest plots.", 'var2_desc': 'Soil moisture refers to the percentage of water contained within the soil, typically measured at a specific depth, which in this case is 10cm, and is influenced by various factors including the clay content of the soil.'}
saved_pairs_info['pair0092'] = {'var1': ' organic carbon in soil (forest)', 'var2': ' clay cont. in soil (forest)', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'x = Organic C content in soil (in g Carbon/kg)\ny = Clay content (in g/kg)\n\nData taken from 150 forest plots. Plots with missing data were excluded.\n\nThe clay content in a soil is one determinant of how much carbon can be accumulated in a soil.\n\nReference:\nSolly et al (2014). Factors controlling decomposition rates of fine root litter in temperate forests and grasslands. Plant and Soil, 382(1-2), 203-218.\n\ny --> x\n', 'var1_desc': 'Organic carbon in forest soil is the carbon component of soil organic matter, influenced by various factors including clay content, and is integral to processes like decomposition and nutrient cycling.', 'var2_desc': "Clay content in forest soil is a quantifiable measure, expressed in grams per kilogram (g/kg), which influences the soil's capacity to accumulate organic carbon, thereby playing a crucial role in soil fertility and ecosystem functioning."}
saved_pairs_info['pair0093'] = {'var1': ' precipitation', 'var2': ' runoff', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'x = average precipitation over 1948 to 2004 in mm/day\ny = average runoff in over 1948 to 2004 mm/day\n\nPrecipitation and runoff data over 438 river catchments in the US. We deleted 6 catchments where runoff data had missing values. \nDaily values were downloaded and averaged over the whole available time period (1948-2004).\n\nSource:\n\n"MOPEX" data set (http://www.nws.noaa.gov/ohd/mopex/mo_datasets.htm)\n\nData can be downloaded from ftp://hydrology.nws.noaa.gov/pub/gcip/mopex/US_Data/Us_438_Daily/\n\n\nCommon sense tells us that precipitation is the largest driver of runoff with some confounders such as catchment characteristics.\n \nx --> y', 'var1_desc': "Precipitation is a key hydrological process involving the fall of water, in various forms, from the atmosphere to the Earth's surface, significantly influencing the amount of runoff in a given area.", 'var2_desc': 'Runoff is the movement of water, primarily resulting from precipitation, that flows over the land surface, infiltrates into the ground, or is stored in water bodies, influencing the water cycle and catchment hydrology.'}
saved_pairs_info['pair0094'] = {'var1': ' hour of day', 'var2': ' temperature', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0094:\n\nx = hour of the day\ny = temperature in degree celsius\n\nSource:\n\nThe data come from a regional energy distributor in Turkey and was obtained from S. Armagan Tarim and Steve Prestwich on June 11 2015.\n\nCommon sense tells us that temperature depends on the hour of the day.\n \nx --> y \n', 'var1_desc': 'The "hour of the day" is a time measurement unit that divides a single day into 24 equal parts, each representing a specific period within a 24-hour cycle, and is often used to analyze patterns or changes in various phenomena over the course of a day.', 'var2_desc': 'Temperature, in this context, refers to the measure of heat intensity in the environment, expressed in degrees Celsius, which varies according to the hour of the day.'}
saved_pairs_info['pair0095'] = {'var1': ' hour of day', 'var2': ' electricity load', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0095:\n\nx = hour of the day\ny = load: the total electricity consumption in a region of Turkey in "MWh"\n\nSource:\n\nThe data come from a regional energy distributor in Turkey and was obtained from S. Armagan Tarim and Steve Prestwich on June 11 2015.\n\nThe hour of the day defines what people usually do, if they are sleeping, cooking, watching TV etc. Hence the energy consumption is driven by the hour of the day.\n \nx --> y \n', 'var1_desc': "The 'hour of the day' is a time unit representing a specific hour within a 24-hour day, which significantly influences human activities and consequently affects the energy consumption in a given region.", 'var2_desc': 'Electricity load refers to the total amount of electrical power consumed in a specific region at a given time, in this case, measured in Megawatt hours (MWh) in a region of Turkey, with the consumption patterns largely influenced by the hour of the day and associated human activities.'}
saved_pairs_info['pair0096'] = {'var1': ' temperature', 'var2': ' electricity load', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0096:\n\nx = temperature in degree celsius\ny = load: the total electricity consumption in a region of Turkey in "MWh"\n\nSource:\n\nThe data come from a regional energy distributor in Turkey and was obtained from S. Armagan Tarim and Steve Prestwich on June 11 2015.\n\nCold temperatures let people turn on the heating while hot temperatures might trigger them to turn on a fan or air conditioning. \nGenerally, temperature affects the use of electricity of humans, while energy consumptions does not directly influence temperature in a region.\n \nx --> y \n', 'var1_desc': 'Temperature, measured in degrees Celsius in this context, is a quantitative representation of the degree of hotness or coldness in a region, which influences human behaviors such as the use of heating or cooling appliances, thereby affecting the total electricity consumption.', 'var2_desc': 'Electricity load refers to the total amount of electrical power consumed in a specific region at a given time, which in this context is influenced by the temperature as changes in weather conditions can affect the use of heating or cooling appliances, thereby impacting energy consumption.'}
saved_pairs_info['pair0097'] = {'var1': ' speed at the beginning', 'var2': ' speed at the end', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0097\n\ninitial and final speed of a ball on a ball track for children\n\nThe data has been recorded by Dominik Janzing using a ball track that has been equipped with two pairs of light barriers. The first pair measures the initial speed\n and the second pair the speed of a ball at some later position of the track. The units of the speeds are arbitrary and differ for both measurements (X and Y) since they are obtained by\ninverting the time the ball needed to pass the distance between two light barriers of one pair. \n \n The initial part of the track \nhas large slope. The initial speed is strongly determined by the exact position where the ball is put on the track. For part of the runs, the position of the ball has been chosen by D. Janzing, the other part by a 4-year old child. This should avoid that the variation of the initial position is done in a too systematic way. \n\n', 'var1_desc': 'The speed at the beginning, or initial speed, refers to the velocity of the ball when it is first placed on the track, which is largely influenced by the exact position where the ball is put and the slope of the track.', 'var2_desc': 'The final speed of the ball on the track is the velocity measured at a later position on the track, determined by the time it takes for the ball to pass between the second pair of light barriers, with the units being arbitrary and different from the initial speed measurement.'}
saved_pairs_info['pair0098'] = {'var1': ' speed at the beginning', 'var2': ' speed at the end', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0098\n\ninitial and final speed of a ball on a ball track for children\n\nThe data has been recorded by Dominik Janzing using a ball track that has been equipped with two pairs of light barriers. The first pair measures the initial speed\n and the second pair the speed of a ball at some later position of the track. The units of the speeds are arbitrary and differ for both measurements (X and Y) since they are obtained by\ninverting the time the ball needed to pass the distance between two light barriers of one pair. \n \n The initial part of the track ("acceleration zone")\nhas large slope. The initial speed is strongly determined by the exact position where the ball is put on the acceleration zone. For part of the runs, the position of the ball has been chosen by D. Janzing, the other part by a 4-year old child. This should avoid that the variation of the initial position is done in a too systematic way. \nThe setup is actually the same as for pair0097, but for pair0098 the ball track had a longer acceleration zone, which allows for larger deviations of the initial speed. \n\n', 'var1_desc': "The speed at the beginning, or initial speed, in this context refers to the velocity of a ball at the start of its journey on a ball track, which is primarily influenced by the exact position where the ball is placed on the track's acceleration zone.", 'var2_desc': 'The speed at the end refers to the final velocity of a ball on a track, measured at a later position on the track using a pair of light barriers, with the units being arbitrary and obtained by inverting the time taken for the ball to pass the distance between the two barriers.'}
saved_pairs_info['pair0099'] = {'var1': ' language test score', 'var2': ' social-economic status family', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Dataset \'nlschools\' from the R MASS package. \n\nAccording to the description in the documentation of the package \n(https://cran.r-project.org/web/packages/MASS/MASS.pdf ):\n\n"Snijders and Bosker (1999) use as a running example a study of 2287 eighth-grade pupils (aged\nabout 11) in 132 classes in 131 schools in the Netherlands.\n\nSource:\nSnijders, T.A.B. and Bosker, R.J. (1999)\n\'Multilevel Analysis. An Introduction to Basic and Advanced Multilevel Modelling.\'\nLondon: Sage."\n\nWe converted the data to a text file by the following R commands:\n\nlibrary(\'MASS\')\nwrite.matrix(nlschools,file=\'/tmp/nlschools.txt\')\n\nX = the first column (\'lang\', language test score) \nY = the fifth column (\'SES\', social-economic status of pupil\'s family) column. \n\nWe consider SES to be one of the causes of lang, but note that selection bias may be\npresent (via the choice of the classes and schools).\n\n', 'var1_desc': "The language test score ('lang') in the 'nlschools' dataset from the R MASS package represents the performance of eighth-grade pupils on a language proficiency test in the Netherlands, which is potentially influenced by factors such as the socio-economic status of the pupil's family.", 'var2_desc': "Social-economic status of a family (SES) is a comprehensive measure that assesses a family's societal standing based on various factors including income, education level, and occupation."}
saved_pairs_info['pair0100'] = {'var1': ' cycle time of CPU', 'var2': ' performance', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Dataset \'cpus\' from the R MASS package. \n\nAccording to the description in the documentation of the package \n(https://cran.r-project.org/web/packages/MASS/MASS.pdf ):\n\n"A relative performance measure and characteristics of 209 CPUs.\n\nSource:\nP. Ein-Dor and J. Feldmesser (1987) \n\'Attributes of the performance of central processing units: a relative performance prediction model.\'\nComm. ACM. 30, 308â€“317"\n\nWe converted the data to a text file by the following R commands:\n\nlibrary(\'MASS\')\nwrite.matrix(cpus,file=\'/tmp/cpus.txt\',sep=\',\')\n\nX = logarithm of the second column (\'syct\', cycle time in nanoseconds)\nY = logarithm of the seventh column (\'perf\', published performance on a benchmark mix relative to an IBM 370/158-3)\n\nWe consider syct to be one of the causes of perf.\n\n', 'var1_desc': "The cycle time of a CPU, represented as 'syct' in the 'cpus' dataset from the R MASS package, refers to the duration taken by a CPU to execute a single instruction cycle, measured in nanoseconds, which can influence the overall performance of the CPU.", 'var2_desc': 'Performance, in the context of central processing units (CPUs), refers to the efficiency and speed at which a CPU can execute tasks, often measured relative to a benchmark, such as the IBM 370/158-3 in this dataset.'}
saved_pairs_info['pair0101'] = {'var1': ' grey value of a pixel', 'var2': ' brightness of the screen', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0101:\n\nBrightness of screen\n\nThis is from an experiment that has been performed to generate an example of a cause-effect relation\nthat is clearly unconfounded.\n\nFirst column (x): grey value of a pixel that is chosen randomly from a fixed image. The grey value\nis displayed by the color of a square on a computer screen\n\nSecond column (y): light intensity seen by a photo diode placed several centimeters away from the screen.  \n\nMore precisely, the light intensity is measured by the \nAdafruit TSL2591 High Dynamic Range Digital Light Sensor connected to an Arduino microcontroler. \nhttps://learn.adafruit.com/adafruit-tsl2591/wiring-and-test\n\nThe measurement has been performed by D. Janzing in August 2016.\n\n', 'var1_desc': 'The grey value of a pixel refers to the intensity of a pixel in a grayscale image, where higher values indicate brighter shades and lower values represent darker shades.', 'var2_desc': 'The brightness of the screen refers to the light intensity emitted by a pixel on a computer screen, which can be quantified using tools like the Adafruit TSL2591 High Dynamic Range Digital Light Sensor.'}
saved_pairs_info['pair0102'] = {'var1': ' position of a ball', 'var2': ' time for passing a track segment', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0102:\n\nposition vs. passing time\n\npair0102, pair0103, pair0104 are taken from the same run of an experiment at a ball track for children at which some sensors have been mounted to measure the following parameters:\n\nvariable 1: position on the ball track where the ball starts\nvariable 2: time interval between passing the first and the second light barrier.\nvariable 3: time interval between passing the third and the fourth light barrier.\n\nThe initial part of the track ("acceleration zone")\nhas large slope, therefore the initial position is decisive for the speed the ball  gets when it rolls downwards. Therefore and because of the order of the light barriers we have the following causal chain:\n\nvariable 1 ---> variable 2 --> variable 3 \n\nhere we have x= variable 1, y= variable 2\n\nThe data has been measured by D. Janzing in August 2016. \n\n\n', 'var1_desc': "The position of a ball, in this context, refers to the initial location on the ball track from where the ball begins its descent, which significantly influences the speed it attains while rolling downwards due to the track's steep slope.", 'var2_desc': "Time for passing a track segment refers to the measured duration it takes for a ball to traverse between two designated points, or light barriers, on a ball track, which can be influenced by the ball's initial position and subsequent acceleration."}
saved_pairs_info['pair0103'] = {'var1': ' position of a ball', 'var2': ' time for passing a track segment', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0102:\n\nposition vs. passing time\n\npair0102, pair0103, pair0104 are taken from the same run of an experiment at a ball track for children at which some sensors have been mounted to measure the following parameters:\n\nvariable 1: position on the ball track where the ball starts\nvariable 2: time interval between passing the first and the second light barrier.\nvariable 3: time interval between passing the third and the fourth light barrier.\n\nThe initial part of the track ("acceleration zone")\nhas large slope, therefore the initial position is decisive for the speed the ball  gets when it rolls downwards. Therefore and because of the order of the light barriers we have the following causal chain:\n\nvariable 1 ---> variable 2 --> variable 3 \n\nhere we have x= variable 1, y= variable 3\n\nThe data has been measured by D. Janzing in August 2016. \n\n\n', 'var1_desc': "The position of a ball, in this context, refers to the initial location on the ball track from where the ball begins its descent, a factor that significantly influences the speed the ball gains as it rolls downwards due to the track's steep slope.", 'var2_desc': 'The concept of time for passing a track segment refers to the measured duration it takes for a ball to traverse between two designated points (light barriers) on a ball track, which is influenced by the initial position and resultant speed of the ball.'}
saved_pairs_info['pair0104'] = {'var1': ' time for passing 1. segment', 'var2': ' time for passing 2. segment', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0102:\n\nposition vs. passing time\n\npair0102, pair0103, pair0104 are taken from the same run of an experiment at a ball track for children at which some sensors have been mounted to measure the following parameters:\n\nvariable 1: position on the ball track where the ball starts\nvariable 2: time interval between passing the first and the second light barrier.\nvariable 3: time interval between passing the third and the fourth light barrier.\n\nThe initial part of the track ("acceleration zone")\nhas large slope, therefore the initial position is decisive for the speed the ball  gets when it rolls downwards. Therefore and because of the order of the light barriers we have the following causal chain:\n\nvariable 1 ---> variable 2 --> variable 3 \n\nhere we have x= variable 2, y= variable 3\n\nThere is an important difference to pair0097 and pair0098 where the variables where given by the\ninverse of the time (i.e. the speed) to pass a pair of light barriers. Here we report the time itself.\n\n\nThe data has been measured by D. Janzing in August 2016. \n\n\n', 'var1_desc': "The time for passing the 1st segment, denoted as variable 2, refers to the measured time interval between when the ball passes the first and second light barriers on the ball track, which is influenced by the initial position of the ball due to the track's large slope.", 'var2_desc': 'The time for passing the 2nd segment, denoted as variable 3, refers to the time interval measured between the ball passing the third and fourth light barriers on the ball track during the experiment.'}
saved_pairs_info['pair0105'] = {'var1': ' pixel vector of a patch', 'var2': ' total brightness at the screen', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': "Information for pair0105:\n\nBrightness of screen\n\nThis is from an experiment that has been performed to generate an example of a cause-effect relation\nthat is clearly unconfounded. Patches of size 3x3 pixel are randomly drawn from a fixed black and white image. The patch is then displayed\nas an image of size about several centimeter. A photo diode is placed several centimeter away from the screen at a position where the light intensity is dominated by the patch. \n(More precisely, the light intensity is measured by the \nAdafruit TSL2591 High Dynamic Range Digital Light Sensor connected to an Arduino microcontroler. \nhttps://learn.adafruit.com/adafruit-tsl2591/wiring-and-test).\npair0101 is based on the same experimental setting, but there the `patches' have size 1 x 1 pixel. \n\nFirst column (x): pixel vector of grey values of the patch.\n\nSecond column (y): light intensity seen by a photo diode placed several centimeters away from the screen.  \n\n\nThe measurement has been performed by D. Janzing in August 2016.\n\n", 'var1_desc': 'A pixel vector of a patch refers to the array of grey values assigned to each pixel within a selected 3x3 area of an image, representing the brightness level of each pixel.', 'var2_desc': 'The total brightness at the screen refers to the cumulative light intensity emitted from a displayed image patch, measured in terms of grey values of the patch, and detected by a photo diode placed at a certain distance from the screen.'}
saved_pairs_info['pair0106'] = {'var1': ' time required for one round', 'var2': ' voltage', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'context': 'Information for pair0106:\n\nSpeed of an electric toy locomotive\n\n\nThis is from an experiment where an electric toy locomotive (Maerklin) passes one round at different speeds, controlled by varyring voltage.\nThe time required by the locomotive has been measured by a pair of light barriers.  The voltage has been varied by manually changing the control knob\nof a transformer and measured by the analog input of an Arduino after sending the electric AC voltage through a rectifier.  \n\nFirst column (x): time required for passing one round   \n\nSecond column (y): electric voltage \n\n\nThe measurement has been performed by D. Janzing in February 2017.\n\n', 'var1_desc': 'The time required for one round refers to the duration it takes for the electric toy locomotive to complete a single circuit, with this duration being influenced by the varying voltage levels applied to the locomotive.', 'var2_desc': 'Voltage, in this context, refers to the electric potential difference measured in volts, which was manually adjusted using a transformer to control the speed of an electric toy locomotive in an experiment conducted by D. Janzing in February 2017.'}
saved_pairs_info['pair0107'] = {'var1': ' strength of contrast', 'var2': ' answer correct or not', 'ground_truth': ' R', 'truth_ab': 1, 'truth_ba': 0, 'context': 'Information for pair0107:\n\nDirection of Gabor patches\n\nThe data set is from a psychophysics experiments with human subjects. A screen shows tilted Gabor patches (which are patterns of stripes\nfrequently used as stimuli in psychological experiments), either tilted to the left or to the right. The subject are asked to infer the direction, while the patches are shown with stronger or weaker contrast. The variable X describes the contrast values ranging from 0.0150 to 0.0500 in steps of 0.0025. The variable Y is a binary indicating whether the direction has been identified correctly (Y=1) or not (Y=0). For low values of the contrast the fraction of correct decisions approaches chance level (50%).\n\n\nFirst column (x): contrast\n\nSecond column (y): answer correct or not\n\nThe data set has been recorded by Heiko SchÃ¼tt in 2014. \n\n\n', 'var1_desc': "The strength of contrast refers to the difference in visual properties that makes an object distinguishable from other objects and the background, often measured in terms of light and dark values, and in the context of this data set, it is quantified in values ranging from 0.0150 to 0.0500, influencing the visibility of the Gabor patches and the subject's ability to correctly identify their direction.", 'var2_desc': '"Answer correct or not" is a binary variable (Y) in the dataset, indicating whether the human subjects in the psychophysics experiment correctly identified the tilt direction of Gabor patches (Y=1) or not (Y=0), with the performance accuracy potentially influenced by the contrast value of the patches.'}
saved_pairs_info['pair0108'] = {'var1': ' time for 1/6 rotation', 'var2': ' temperature', 'ground_truth': ' L', 'truth_ab': 0, 'truth_ba': 1, 'var1_desc': "The time for 1/6 rotation refers to the duration required for the wheel of a Stirling engine to complete one-sixth of a full rotation, serving as a measure of the engine's inverse velocity.", 'context': "Information for pair 108:\n\nThis pair shows the dependence of the inverse velocity and the temperature of the heat bath of a Striling engine. The engine is\ndriven by a cup of hot water that is put underneath.\nThe inverse velocity is measured by the time needed for the engine's wheel for 1/6 rotation (because the wheel has 6 radius arms). \nThe temperature is measured by a sensor that was put into the cup. \n\n\n\nFirst column (x): time for 1/6 rotation\n\nSecond column (y): temperature in Degree Celsius\n\nThe data set has been recorded by Dominik Janzing in 2017\n\n\n", 'var2_desc': 'Temperature is a quantitative measure of heat or coldness of a body or an environment, expressed in degrees, and it influences physical properties and behaviors such as the rate of chemical reactions or the efficiency of a Stirling engine.'}

# Get relationship of each variable pair

In [7]:
llm_output : Dict[str, dict] = {}

####  Variables + Straight Strategy 

In [20]:
temperature = 0.3
for n in range (1, 6):
    for pair_number, values in saved_pairs_info.items():
        
        temp_dict = {}

        temp_dict['llm_ab'] = modeler.suggest_relationship(variable_a=values['var1'], description_a=values['var1_desc'], variable_b=values['var2'], description_b=values['var2_desc'], llm=gpt4, temperature=temperature, strategy=Strategy.Straight)

        temp_dict['llm_ba'] = modeler.suggest_relationship(variable_a=values['var2'], description_a=values['var2_desc'], variable_b=values['var1'], description_b=values['var1_desc'],llm=gpt4, temperature=temperature, strategy=Strategy.Straight)
        
        llm_output[(pair_number, temperature, n)] = temp_dict


##### Average LLM Output

In [23]:
av_ab : int
av_ba : int

for i in range(5):
    av_ab += llm_output[('pair0087', 0.3, i+1)]['llm_ab']
    av_ba += llm_output[('pair0087', 0.3, i+1)]['llm_ba'] 

    print(llm_output[('pair0087', 0.3, i+1)]['llm_ab'])
    print(llm_output[('pair0087', 0.3, i+1)]['llm_ba'])

av_ab = av_ab/5.0
av_ba = av_ba/5.0

print(av_ab)
print(av_ba)

1
0
1
0
1
0
1
0
1
0
1.0
0.0


In [46]:
for id in saved_pairs_info:

    av_correct_ab = 0
    av_correct_ba = 0
    
    for i in range(5):
        print(llm_output[(id, 0.3, i+1)]['llm_ab'])

1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
1
0
0
0
0
0
1
1
1
1
1


In [52]:
results : Dict = {}

for id in saved_pairs_info:

    av_correct_ab = 0
    av_correct_ba = 0
    
    for i in range(5):

        if llm_output[(id, 0.3, i+1)]['llm_ab'] == 1 and saved_pairs_info[id]['ground_truth'] == " R":
            av_correct_ab += 1
        elif llm_output[(id, 0.3, i+1)]['llm_ab'] == 0 and saved_pairs_info[id]['ground_truth'] == " L":
            av_correct_ab += 1

        if llm_output[(id, 0.3, i+1)]['llm_ba'] == 1 and saved_pairs_info[id]['ground_truth'] == " L":
            av_correct_ba += 1
        elif llm_output[(id, 0.3, i+1)]['llm_ba'] == 0 and saved_pairs_info[id]['ground_truth'] == " R":
            av_correct_ba += 1

    av_correct_ab /= 5.0
    av_correct_ba /= 5.0
    
    temp : Dict = {}

    temp['PairID'] = id
    temp['CorrectACauseB'] = av_correct_ab
    temp['CorrectBCauseA'] = av_correct_ba
    temp['VarA'] = saved_pairs_info[id]['var1']
    temp['VarB'] = saved_pairs_info[id]['var2'] 
    temp['GroundTruth'] = saved_pairs_info[id]['ground_truth']

    results[id] = temp
    print(results[id])




{'PairID': 'pair0001', 'CorrectACauseB': 1.0, 'CorrectBCauseA': 1.0, 'VarA': ' Altitude', 'VarB': ' Temperature', 'GroundTruth': ' R'}
{'PairID': 'pair0002', 'CorrectACauseB': 1.0, 'CorrectBCauseA': 1.0, 'VarA': ' Altitude', 'VarB': ' Precipitation', 'GroundTruth': ' R'}
{'PairID': 'pair0003', 'CorrectACauseB': 1.0, 'CorrectBCauseA': 1.0, 'VarA': ' Longitude', 'VarB': ' Temperature', 'GroundTruth': ' R'}
{'PairID': 'pair0004', 'CorrectACauseB': 1.0, 'CorrectBCauseA': 1.0, 'VarA': ' Altitude', 'VarB': ' Sunshine hours', 'GroundTruth': ' R'}
{'PairID': 'pair0005', 'CorrectACauseB': 0.0, 'CorrectBCauseA': 1.0, 'VarA': ' Age', 'VarB': ' Length', 'GroundTruth': ' R'}
{'PairID': 'pair0006', 'CorrectACauseB': 1.0, 'CorrectBCauseA': 1.0, 'VarA': ' Age', 'VarB': ' Shell weight', 'GroundTruth': ' R'}
{'PairID': 'pair0007', 'CorrectACauseB': 1.0, 'CorrectBCauseA': 1.0, 'VarA': ' Age', 'VarB': ' Diameter', 'GroundTruth': ' R'}
{'PairID': 'pair0008', 'CorrectACauseB': 1.0, 'CorrectBCauseA': 1.0, 'V

#### Save to csv file

In [53]:
import csv
import copy

# CSV file name
csv_file = "gpt-4_results_straight_prompt_w_descriptions.csv"

# Define the CSV file's header (column names)
header = ["CorrectACauseB", "CorrectBCauseA", "PairID", "VarA", "VarB", "GroundTruth"]

# Write the data to the CSV file
with open(csv_file, mode="w", newline="", encoding="utf-8") as file:
    writer = csv.DictWriter(file, fieldnames=header)
    writer.writeheader() 
    for pair_id, values in results.items():
        writer.writerow(values)

print(f"CSV file '{csv_file}' has been created.")


CSV file 'gpt-4_results_straight_prompt_w_descriptions.csv' has been created.
