# Time Series

In [None]:
# please provide your OPENAI_KEY
OPENAI_KEY = "sk-<your-openai-key>"

## Related Functions

In [None]:
import json
import re
import requests
import base64

def run_local_vision_request(text, image_urls, temperature=0):
    def encode_image(image_path):
        with open(image_path, "rb") as image_file:
            return base64.b64encode(image_file.read()).decode('utf-8')
    api_key = OPENAI_KEY

    messages = [
        {
            "role": "user",
            "content": [{"type": "text", "text": text},
            ]
        }
    ]

    for image_path in image_urls:
        # Getting the base64 string
        base64_image = encode_image(image_path)
        messages[0]["content"].append({
            "image_url": f"data:image/jpeg;base64,{base64_image}"
        })

    headers = {
    "Content-Type": "application/json",
    "Authorization": f"Bearer {api_key}"
    }

    payload = {
        "model": "gpt-4-vision-preview",
        "messages": messages,
        "max_tokens": 2048,
        "temperature": temperature,
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    return response.json()

def run_test(name, description, prompt, image_urls, logger, times=1):
    response_list = []
    while len(response_list) < times:
        try:
            response = run_local_vision_request(
                text=prompt, 
                image_urls=image_urls,
            )
            matched_content = re.search(r'```json([\s\S]*?)```', response["choices"][0]["message"]["content"])
            if matched_content:
                json_format_data = json.loads(matched_content.group(1))
            else: 
                json_format_data = ""
            logger.log(name, description, prompt, image_urls, response["choices"][0]["message"]["content"], json_format_data)
            response_list.append(response["choices"][0]["message"]["content"])
        except Exception as e:
            print(e)
            
    return response_list

def get_result_by_description(data, name, description):
    for result in data:
        if name == result["name"] and description == result["description"]:
            return result

class Logger:
    def __init__(self, file_path):
        self.file_path = file_path
        
    def log(self, name, description, prompt, image_urls, response, json_format_data):
        json_data_to_add = {"name": name, "description": description, "prompt": prompt, "image_urls": image_urls, "response": response, "json_format_data": json_format_data}

        with open(self.file_path, 'r') as file:
            data = json.load(file)

        data.append(json_data_to_add)

        with open(self.file_path, 'w') as file:
            json.dump(data, file, indent=4)
logger = Logger("./time_series.json")

from paperqa import (
    Docs,
)
import nest_asyncio
nest_asyncio.apply()


from PIL import Image
def image_to_gray(file_path, task_list, vis_list):
    for task in task_list:
        for vis in vis_list:
            image_url = file_path + task + "/" + vis + ".png"
            image = Image.open(image_url)
            gray_image = image.convert('L')
            gray_image.save(file_path + task + "/" + vis + "_gray" + ".png")

def get_rag_database():
    web_knowledge_line = Docs()
    web_knowledge_heatmap = Docs()
    web_knowledge_icicle = Docs()

    # web_search(WEB_SEARCH.format(visualization=vis))
    web_knowledge_line.add(
        path="./experiment_material/web_knowledge_line.txt", docname='web_knowledge_line')
    web_knowledge_heatmap.add(
        path="./experiment_material/web_knowledge_heatmap.txt", docname='web_knowledge_heatmap')
    web_knowledge_icicle.add(
        path="./experiment_material/web_knowledge_icicle.txt", docname='web_knowledge_icicle')

    web_knowledge = {"line": web_knowledge_line,
                "heatmap": web_knowledge_heatmap,
                "icicle": web_knowledge_icicle,
                }
    return web_knowledge

## Prompt Preparation

In [None]:
ROLE = "You are an average user. "
INTRODUCTION = "These visualizations represent each day's sales. By looking at the visualizations, you know that here are 112 days: Each day follows the structure 'Day X-Week Y-Month Z', where 'X' denotes the day of the week (ranging from Day 1 to Day 7), 'Y' indicates the week of the month (ranging from Week 1 to Week 4), and 'Z' specifies the month of the year (January, February, March, April). {vis_introduction}"
MAXIMA = "By looking at these three visualizations ({visualizations}), your goal is to identify the day(Day X-Week Y-Month Z) with the absolute highest sales through individual visualisations. Just determine which day has the maximum sale."
MINIMA = "By looking at these three visualizations ({visualizations}), your goal is to identify the day(Day X-Week Y-Month Z) with the absolute lowest sales through individual visualisations. Just determine which day has the minimum sale."
COMPARISON = "By looking at these three visualizations ({visualizations}), your goal is to discover which of the following weeks has the highest aggregated sales: {week_x} or {week_y} through different visualisations. Just determine which week of the two weeks has the greater total sales."
TREND_DETECTION = "By looking at these three visualizations ({visualizations}), your goal is to discover the week(Week Y-Month Z) with the smallest difference in sales between the first and last day through individual visualisations. Just determine which week has the smallest difference."
DESCRIPTION = "Please describe three visualizations({visualizations}) in details."
STEP = "You need to tell me the exact process of completing the task using the different visualisations and the problem you encountered, but not the answers."
RATING = "Here's how one person accomplishes a task: {steps} Based on the knowledge you have, assuming you are this person, you should answer two 5-point Likert scale questions for three different visualisations, ranging from strongly agree (5) to strongly disagree (1): (i) I feel confident about the given answer and (ii) I think this visualisation is easy to use for this task. And give the reason."
WEB_SEARCH = "Find some websites which introduce the {visualization}, and introduce the content about the {visualization}."
JSON_FORMAT_DESCRIPTION = "Please give an additional scoring result in json format at the end of your answe, like ```json{{'line': description, 'heatmap': description, 'icicle': description }}```."
JSON_FORMAT_RATING = "Please give an additional scoring result in json format at the end of your answe, like ```json{{'confidence': {{'line': value, 'heatmap': value, 'icicle': value}}, 'easy to use': {{'line': value, 'heatmap': value, 'icicle': value}} }}```."


## Data Preparation

In [None]:
task_list = ["maxima", "minima", "comparison", "trend_detection"]
vis_list = ["line", "heatmap", "icicle", "radar", "circular_heatmap", "sunburst"]
vis_introduction_dict = {"line": "For line chart, the y-value of point indicates the the daily sale.", 
                    "heatmap": "For heatmap, the color shade of the cell indicates the daily sale.", 
                    "icicle": "For icicle plot, the size of the rectangle indicates the sale.",
                    "radar": "For radar chart, the radial value indicates the sale. ",
                    "circular_heatmap": "For circular heatmap, the color shade of the cell indicates the daily sale.",
                    "sunburst": "For sunburst visualisation, the angle covered by each sector represents the proportion of sale."}
TASK_PROMPT = {"maxima": MAXIMA, "minima": MINIMA, "comparison": COMPARISON, "trend_detection": TREND_DETECTION}
file_path = "./experiment_material/"

## Task Execution

In [None]:
def execution(task_list, vis_list, times):
    name = "time_series_experiment"
    for task in task_list:
        image_urls = [file_path + task + "/" + vis + ".png" for vis in vis_list]
        for i in range(times):
            # web_search(WEB_SEARCH.format(visualization=vis_list))
            with open("./experiment_material/web_knowledge.txt", "r") as f:
                context = f.read()
            visualizations = ",".join(vis_list)
            vis_introduction = "".join([vis_introduction_dict[vis] for vis in vis_list])
            description = task + "_description_" + str(i)
            prompt = ROLE + \
                    DESCRIPTION.format(visualizations=visualizations) + \
                    INTRODUCTION.format(vis_introduction=vis_introduction) + \
                    "DON'T say sorry or you cannot. YOU CAN."
            vis_description = run_test(
                name, description, prompt, image_urls, logger, times=1)
            
            description = task + "_steps_" + str(i)
            prompt = ROLE + \
                    TASK_PROMPT[task].format(visualizations=visualizations) + \
                    INTRODUCTION.format(vis_introduction=vis_introduction) + \
                    STEP + \
                    "Description of three visualizations: " + vis_description[0] + \
                    "Context: " + context + \
                    "DON'T say sorry or you cannot. YOU CAN."
            steps = run_test(name, description, prompt, image_urls, logger, times=1)

            description = task + "_rating_" + str(i)
            prompt = ROLE + \
                    TASK_PROMPT[task].format(visualizations=visualizations) + \
                    RATING.format(steps=steps) + \
                    JSON_FORMAT_RATING + \
                    "DON'T say sorry or you cannot. YOU CAN."
            rating = run_test(name, description, prompt, image_urls, logger, times=1)

In [None]:
for task in task_list:
    execution([task], vis_list, 5)

## Additional Experiment (automatical knowledge injection)

In [None]:
name = "time_series_extra_experiment"
task = "comparison"
vis_list = ["line", "heatmpa", "icicle"]
image_urls = [file_path + task + "/" + vis + ".png" for vis in vis_list]

# round1
# web_search(WEB_SEARCH.format(visualization=vis_list))
with open("./experiment_material/web_knowledge.txt", "r") as f:
    context = f.read()
visualizations = ",".join(vis_list)
vis_introduction = "".join([vis_introduction_dict[vis] for vis in vis_list])
description = task + "_description_round1"
prompt = ROLE + \
        DESCRIPTION.format(visualizations=visualizations) + \
        INTRODUCTION.format(vis_introduction=vis_introduction) + \
        "DON'T say sorry or you cannot. YOU CAN."
vis_description = run_test(
    name, description, prompt, image_urls, logger, times=1)

description = task + "_steps_round1"
prompt = ROLE + \
        TASK_PROMPT[task].format(visualizations=visualizations) + \
        INTRODUCTION.format(vis_introduction=vis_introduction) + \
        STEP + \
        "Description of three visualizations: " + vis_description[0] + \
        "Context: " + context + \
        "DON'T say sorry or you cannot. YOU CAN."
steps = run_test(name, description, prompt, image_urls, logger, times=1)

# round2
content = {"line": steps[0].split("1.")[1].split("\n\n")[0],
            "heatmap": steps[0].split("2.")[1].split("\n\n")[0],
            "icicle": steps[0].split("3.")[1].split("\n\n")[0],
            }
web_knowledge = get_rag_database()

for vis in vis_list:
    steps_str = content[vis]
    split_text_double_newline = steps_str.split("\n\n")
    split_text_final = [chunk.split("\n") for chunk in split_text_double_newline]
    for split_text1 in split_text_final:
        for chunk in split_text1:
            for i in range(0, 5):
                description = task + "_steps_round2_" + str(i)
                filter_context = web_knowledge[vis].query(query=chunk, max_sources=1).context
                prompt = ROLE + \
                        TASK_PROMPT[task].format(visualizations=visualizations) + \
                        INTRODUCTION.format(vis_introduction=vis_introduction) + \
                        STEP + \
                        "You can use the following context to assist your answer. Context: \n\n {" + filter_context + "}\n\n" +  \
                        "DON'T say sorry or you cannot. YOU CAN."
                steps = run_test(name, description, prompt, image_urls, logger, times=1)

                description = task + "_rating_round2_" + str(i)
                prompt = ROLE + \
                        TASK_PROMPT[task].format(visualizations=visualizations) + \
                        RATING.format(steps=steps) + \
                        JSON_FORMAT_RATING + \
                        "You can use the following context to assist your answer. Context: \n\n {" + filter_context + "}\n\n" +  \
                        "DON'T say sorry or you cannot. YOU CAN."
                rating = run_test(name, description, prompt, image_urls, logger, times=1)