In [1]:
import os
from glob import glob
import json
from tqdm import tqdm
import numpy as np
import pandas as pd
import cv2
from pprint import pprint
import shutil

In [None]:
# Transformed data structures (from list-of-singleton-dicts to dict-of-dicts):

original_vlat = {
    "treemap": {
        "question": "eBay is nested in the Software category.",
        "answer": "False",
        "image_path": "TreeMap.png",
        "options": ["True","False", "Cannot be inferred / inadequate information"]
    },
    "stackedBar100": {
        "question": "Which country has the lowest proportion of Gold medals?",
        "answer": "Great Britain",
        "image_path": "Stacked100.png",
        "options": ["Great Britain","USA","Japan", "None of Them"]
    },
    "histogram": {
        "question": "What distance have customers traveled in the taxi the most?",
        "answer": "30-40km",
        "image_path": "Histogram.png",
        "options": ["50-70km","30-40km","20-30km","50-60km"]
    },
    "choropleth": {
        "question": "In 2020, the unemployment rate for Washington (WA) was higher than that of Wisconsin (WI)?",
        "answer": "True",
        "image_path": "Choropleth.png",
        "options": ["True","False", "Cannot be inferred / inadequate information"]
    },
    "piechart": {
        "question": "What is the approximate global smartphone market share of Samsung?",
        "answer": "17.6%",
        "image_path": "PieChart.png",
        "options": ["17.6%","25.3%","10.9%","35.2%"]
    },
    "bubblechart": {
        "question": "Which city's metro system has the largest number of stations?",
        "answer": "Shanghai",
        "image_path": "BubbleChart.png",
        "options": ["Beijing","Shanghai","London", "None of Them"]
    },
    "stackedbar": {
        "question": "What is the cost of peanuts in Seoul?",
        "answer": "$6.1",
        "image_path": "StackedBar.png",
        "options": ["$5.2","$6.1","$7.5","$4.5"]
    },
    "linechart": {
        "question": "What was the price of a barrel of oil in February 2020?",
        "answer": "$50.54",
        "image_path": "LineChart.png",
        "options": ["$50.54","$47.02","$42.34","$43.48"]
    },
    "barchart": {
        "question": "What is the average internet speed in Japan?",
        "answer": "40.51 Mbps",
        "image_path": "BarChart.png",
        "options": ["42.30 Mbps","40.51 Mbps","35.25 Mbps","16.16 Mbps"]
    },
    "areachar": {
        "question": "What was the average price of a pound of coffee in October 2019?",
        "answer": "$0.71",
        "image_path": "AreaChart.png",
        "options": ["$0.71","$0.90","$0.80","$0.63"]
    },
    "stackedarea": {
        "question": "What was the ratio of girls named \"Isla\" to girls named \"Amelia\" in 2012 in the UK?",
        "answer": "1 to 2",
        "image_path": "StackedArea.png",
        "options": ["1 to 1","1 to 2","1 to 3","1 to 4"]
    },
    "scatterplot": {
        "question": "There is a negative relationship between the height and the weight of the 85 males.",
        "answer": "False",
        "image_path": "Scatterplot.png",
        "options": ["True","False", "Cannot be inferred / inadequate information"]
    }
}

calvi = {
    "calvi1": {
        "question": "What is the number of concert tickets sold on Aug 10 as a proportion of that on Aug 11?",
        "answer": "80%",
        "image_path": "calvi1.png",
        "options": ["20%","40%","60%","80%"]
    },
    "calvi2": {
        "question": "Predatory species Z is more prevalent in NC than in AZ.",
        "answer": "False",
        "image_path": "calvi2.png",
        "options": ["True","False","Cannot be inferred / inadequate information"]
    },
    "calvi3": {
        "question": "Which of the following is True about the annual number of new movie released in city X?",
        "answer": "There were more new movies released in 1992 than in 1994.",
        "image_path": "calvi3.png",
        "options": [
            "There were more new movies released in 1992 than in 1994.",
            "2000 had the most number of new movies released.",
            "There were more new movies released in 1996 than in 1995.",
            "None of the above"
        ]
    },
    "calvi4": {
        "question": "Which of the following is true about the prevalence of the plant species Y?",
        "answer": "It is less prevalent in region C than in D.",
        "image_path": "calvi4.png",
        "options": [
            "It is more prevalent in region D than in E.",
            "It is less prevalent in region B than in C.",
            "It is more prevalent in region C than in A.",
            "It is less prevalent in region C than in D."
        ]
    },
    "calvi5": {
        "question": "Which of the following is true about the three brands of shampoo in city Y from 2001 to 2005?",
        "answer": "The market share of brand C is higher in 2001 than in 2002.",
        "image_path": "calvi5.png",
        "options": [
            "The market share of brand C is on an increasing trend from 2001 to 2005.",
            "The market share of brand C is higher in 2001 than in 2002.",
            "Brand B has the largest market share in 2001 compared to its market share in the other years.",
            "None of the above."
        ]
    },
    "calvi6": {
        "question": "Approximately, what is the numver of votes for candidate A as a proportion of those for candidate B?",
        "answer": "80%",
        "image_path": "calvi6.png",
        "options": ["50%","60%","70%","80%"]
    },
    "calvi7": {
        "question": "Was the average amount of precipitation over 15 years in region D higher than that in region C?",
        "answer": "Cannot be inferred / inadequate information",
        "image_path": "calvi7.png",
        "options": ["Yes","No","Cannot be inferred / inadequate information"]
    },
    "calvi8": {
        "question": "Assuming today is Feb 20, 2022, which of the following is true about the vegetables planted by the three farms?",
        "answer": "None of the above.",
        "image_path": "calvi8.png",
        "options": [
            "All three farms planted more vegetables in 2022 than they did in the previous years.",
            "All three farms planted fewer vegetables in 2022 than they did in the previous years.",
            "All three farms planted about the same amount of vegetables in 2022 than they did in the previous years.",
            "None of the above."
        ]
    },
    "calvi9": {
        "question": "Assuming today is May 01, 2022, which of the following statements is true?",
        "answer": "None of the above.",
        "image_path": "calvi9.png",
        "options": [
            "Company Z had more employees in 2018 than in 2020.",
            "The number of new employees per year fell sharply in 2022.",
            "The total number of employees in company Z decreased in 2022.",
            "None of the above."
        ]
    },
    "calvi10": {
        "question": "Does any member of species C weigh more than 5 lbs?",
        "answer": "Cannot be inferred / inadequate information",
        "image_path": "calvi10.png",
        "options": ["Yes","No","Cannot be inferred / inadequate information"]
    },
    "calvi11": {
        "question": "Which region had the highest number of confirmed cases per 1,000 people?",
        "answer": "Cannot be inferred / inadequate information",
        "image_path": "calvi11.png",
        "options": ["A","B","C","Cannot be inferred / inadequate information"]
    },
    "calvi12": {
        "question": "what is the trend of sales in gitt shop A from Jan to Dec?",
        "answer": "Cannot be inferred / inadequate information",
        "image_path": "calvi12.png",
        "options": ["Generally increasing","Generally decreasing","Generally constant","Cannot be inferred / inadequate information"]
    },
    "calvi13": {
        "question": "More than half of the people in country Z prefer reading at night.",
        "answer": "False",
        "image_path": "calvi13.png",
        "options": ["True","False","Cannot be inferred / inadequate information"]
    },
    "calvi14": {
        "question": "the residents of town Y area voting on a favorite hiking trail, and half of the votes have been counted. The poll closes on Sep 01, 2022. Which of the following is true?",
        "answer": "Too early to say.",
        "image_path": "calvi14.png",
        "options": [
            "Hiking trail A will be voted as the favorite trail.",
            "Hiking trail B will be voted as the favorite trail.",
            "Hiking trail C will be voted as the favorite trail.",
            "Too early to say."
        ]
    },
    "calvi15": {
        "question": "In town X, what is the approximate average number of customers at mall A from Jan to Mar?",
        "answer": "400",
        "image_path": "calvi15.png",
        "options": ["200","300","400","500"]
    }
}

In [5]:
combined_dict = {
    **{k+"-VLAT": len(v["options"]) for k, v in original_vlat.items()},
    **{k+"-CALVI": len(v["options"]) for k, v in calvi.items()}
}

print(combined_dict)

{'treemap-VLAT': 3, 'stackedBar100-VLAT': 4, 'histogram-VLAT': 4, 'choropleth-VLAT': 3, 'piechart-VLAT': 4, 'bubblechart-VLAT': 4, 'stackedbar-VLAT': 4, 'linechart-VLAT': 4, 'barchart-VLAT': 4, 'areachar-VLAT': 4, 'stackedarea-VLAT': 4, 'scatterplot-VLAT': 3, 'calvi1-CALVI': 4, 'calvi2-CALVI': 3, 'calvi3-CALVI': 4, 'calvi4-CALVI': 4, 'calvi5-CALVI': 4, 'calvi6-CALVI': 4, 'calvi7-CALVI': 3, 'calvi8-CALVI': 4, 'calvi9-CALVI': 4, 'calvi10-CALVI': 3, 'calvi11-CALVI': 4, 'calvi12-CALVI': 4, 'calvi13-CALVI': 3, 'calvi14-CALVI': 4, 'calvi15-CALVI': 4}


In [6]:
list(combined_dict.keys())

['treemap-VLAT',
 'stackedBar100-VLAT',
 'histogram-VLAT',
 'choropleth-VLAT',
 'piechart-VLAT',
 'bubblechart-VLAT',
 'stackedbar-VLAT',
 'linechart-VLAT',
 'barchart-VLAT',
 'areachar-VLAT',
 'stackedarea-VLAT',
 'scatterplot-VLAT',
 'calvi1-CALVI',
 'calvi2-CALVI',
 'calvi3-CALVI',
 'calvi4-CALVI',
 'calvi5-CALVI',
 'calvi6-CALVI',
 'calvi7-CALVI',
 'calvi8-CALVI',
 'calvi9-CALVI',
 'calvi10-CALVI',
 'calvi11-CALVI',
 'calvi12-CALVI',
 'calvi13-CALVI',
 'calvi14-CALVI',
 'calvi15-CALVI']

In [7]:

def create_base_components():
    return {
        "bubble": {
            "type": "react-component",
            "path": "literacy/assets/bubble.jsx",
            "instructionLocation": "sidebar",
            "nextButtonLocation": "sidebar",
            "nextButtonDisableTime": 90000,
            "nextButtonEnableTime": 5000
        },
    }

In [14]:

def create_default_components():
    return {
        "instructions": {
            "type": "markdown",
            "path": "literacy/assets/instructions.md",
            "response": []
        },
        "consent": {
            "type": "markdown",
            "path": "shared/consent.md",
            "nextButtonText": "I agree",
            "response": []
        },
        # "vlat_intro": {
        #     "type": "markdown",
        #     "path": "literacy/assets/vlat_intro.md",
        #     "response": []
        # },
        "demographics": {
            "type": "markdown",
            "path": "shared/blank.md",
            "response": [
                {
                    "id": "gender",
                    "prompt": "What is your **gender**?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "withOther": True,
                    "options": [
                        "Woman",
                        "Man",
                        "Prefer not to say"
                    ],
                    "withDivider": True

                },
                {
                    "id": "age",
                    "prompt": "What is your **age**?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "Under 18 years",
                        "18-24 years",
                        "25-34 years",
                        "35-44 years",
                        "45-54 years",
                        "55-64 years",
                        "65 years or older",
                        "Prefer not to say"
                    ],
                    "withDivider": True
                },
                {
                    "id": "education",
                    "prompt": "What is the **highest degree or level of education** you have completed?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "withOther": True,
                    "options": [
                        "Less than high school",
                        "High school diploma or equivalent",
                        "Bachelor's degree or equivalent",
                        "Master's degree or equivalent",
                        "Doctoral degree or equivalent"
                    ],
                    "withDivider": True
                },
                {
                    "id": "experience",
                    "prompt": "Please briefly describe your experience with data visualizations.",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "I create data visualizations frequently, either for work or as a hobby.",
                        "I create data visualizations sometimes, either for work or as a hobby.",
                        "I read data visualizations frequently, such as in news articles.",
                        "I read data visualizations sometimes, such as in news articles.",
                        "I rarely interact with data visualizations in my life."
                    ],
                    "withDivider": True
                }
            ]
        },
        "self_assessment": {
            "type": "markdown",
            "path": "literacy/assets/self_assessment.md",
            "response": [
                {
                    "id": "SGL-item-1",
                    "prompt": "Q1: How good are you at working with bar charts?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (not at all good)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (extremely good)"
                    ],
                    "withDivider": True
                },
                {
                    "id": "SGL-item-2",
                    "prompt": "Q2: How good are you at working with line plots?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (not at all good)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (extremely good)"
                    ],
                    "withDivider": True
                },
                {
                    "id": "SGL-item-3",
                    "prompt": "Q3: How good are you at working with pies?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (not at all good)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (extremely good)"
                    ],
                    "withDivider": True
                },
                {
                    "id": "SGL-item-4",
                    "prompt": "Q4: How good are you at inferring the size of a bar in a bar chart?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (not at all good)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (extremely good)"
                    ],
                    "withDivider": True
                },
                {
                    "id": "SGL-item-5",
                    "prompt": "Q5: How good are you at determining the difference between 2 bars in a bar chart?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (not at all good)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (extremely good)"
                    ],
                    "withDivider": True
                },
                {
                    "id": "SGL-item-6",
                    "prompt": "Q6: How good are you at projecting a future trend from a line chart?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (not at all good)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (extremely good)"
                    ],
                    "withDivider": True
                },
                {
                    "id": "SGL-item-7",
                    "prompt": "Q7: Are graphs easier to understand than numbers?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (not at all)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (much easier)"
                    ],
                    "withDivider": True
                },
                {
                    "id": "SGL-item-8",
                    "prompt": "Q8: How often do you find graphical information to be useful?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (never)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (very often)"
                    ],
                    "withDivider": True
                },
                {
                    "id": "SGL-item-9",
                    "prompt": "Q9: To what extent do you believe in the saying 'a picture is worth one thousand words'?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (not at all)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (extremely)"
                    ],
                    "withDivider": True
                },
                {
                    "id": "SGL-item-10",
                    "prompt": "Q10: When reading books or newspapers, how helpful do you find graphs that are part of a story?",
                    "required": True,
                    "location": "belowStimulus",
                    "type": "radio",
                    "options": [
                        "1 (not at all)",
                        "2",
                        "3",
                        "4",
                        "5",
                        "6 (extremely)"
                    ],
                    "withDivider": True
                }
            ]

        }
    }


def create_bubble_components(data, tag='basic'):
    tasks = dict()
    for chart_type, value in data.items():
        question = value["question"]
        image_path = value["image_path"]
        options = value["options"]
        answer = value["answer"]

        tasks[f"{chart_type}-{tag}"] = {
            "baseComponent": "bubble",
            "instruction": f" **{question}**",
            "parameters": {
                "image": f"../literacy/assets/images/{image_path}",
                "question": question,
                "radius": 32,
            },
            "response": [
                {
                    "id": f"{chart_type}-{tag}",
                    "prompt": "Your answer",
                    "required": True,
                    "location": "sidebar",
                    "type": "radio",
                    "options": options + ["Skip"]
                },
                {
                    "id": "answer",
                    "prompt": "Click at least once.",
                    "required": True,
                    "location": "sidebar",
                    "type": "reactive",
                }
            ],
            "correctAnswer": [
                {
                    "id": f"{chart_type}-{tag}",
                    "answer": answer
                }
            ]
        }

    return tasks

In [15]:
vlat_components = create_bubble_components(original_vlat, 'VLAT')
calvi_components = create_bubble_components(calvi, 'CALVI')

In [16]:
def sequence_generator():
    sequence = {
        "order": "fixed",
        "components": [
            "consent",
            "self_assessment",
            "instructions",
            {
                "id": "batch",
                "order": "random",
                "components": list(vlat_components.keys()) + list(calvi_components.keys())
            },
            "demographics",
            
        ]
    }
    return sequence

In [17]:
prolificRedirection = "https://app.prolific.com/submissions/complete?cc=CC8EMRUG"

In [18]:
default_components = create_default_components()
baseComponents = create_base_components()
components = default_components | vlat_components | calvi_components
sequence = sequence_generator()
print(f"Total number of components: {len(components)}")

Total number of components: 31


In [19]:
with open("config.json", "r") as f:
    config = json.load(f)
config['uiConfig']['studyEndMsg'] = f"**Thank you for completing the study. You may click this link and return to Prolific**: [{prolificRedirection}]({prolificRedirection})"
config['components'] = components
config['sequence'] = sequence
config['baseComponents'] = baseComponents
with open("config.json", "w") as f:
    json.dump(config, f, indent=4)