### Imports and setup of LLM

PromptCast github: https://github.com/HaoUNSW/PISA/

In [2]:
import pandas as pd
df = pd.read_csv("../dataset/weatherdata_temp.csv")
df.head()

Unnamed: 0,Date,Avg Temperature
0,2010-01-01,55.7
1,2010-01-02,50.6
2,2010-01-03,52.8
3,2010-01-04,51.0
4,2010-01-05,53.1


In [3]:
df.shape

(5012, 2)

#### Generating temperature_questions_pisa_prompt.txt questions for prompt

In [6]:
import pandas as pd
from datetime import datetime, timedelta

# Convert Date column to datetime
df["Date"] = pd.to_datetime(df["Date"])


# Function to get day name
def get_day_name(date):
    return date.strftime("%A")


# Function to format date
def format_date(date):
    return date.strftime("%B %d, %Y")


# Create sequences of 15 days and generate questions
sequences = []
window_size = 15

# Use step size of 16 to get non-overlapping sequences
for i in range(0, len(df) - window_size, 16):
    window_data = df.iloc[i : i + window_size]
    next_day = df.iloc[i + window_size]

    start_date = window_data.iloc[0]["Date"]
    end_date = window_data.iloc[-1]["Date"]
    next_date = next_day["Date"]

    temperatures = [str(int(round(temp))) for temp in window_data["Avg Temperature"]]

    question = (
        f"From {format_date(start_date)}, {get_day_name(start_date)} to "
        f"{format_date(end_date)}, {get_day_name(end_date)}, "
        f"the average temperature was {', '.join(temperatures)} degree on each day. "
        f"What is the temperature going to be on {format_date(next_date)}, "
        f"{get_day_name(next_date)}?"
    )

    sequences.append(question)

# Save to text file
with open("../dataset/temperature_questions_pisa_prompt.txt", "w") as f:
    f.write("\n".join(sequences))

# Save temperature answers
with open("../dataset/temperature_answers_pisa_prompt.txt", "w") as f:
    for i in range(0, len(df) - window_size, 16):
        next_day = df.iloc[i + window_size]
        answer = str(int(round(next_day["Avg Temperature"])))
        f.write(answer + "\n")

print(f"Created {len(sequences)} sequences")
print("\nFirst sequence example:")
print(sequences[0])
print("\nCreated answer file with temperature values")

Created 313 sequences

First sequence example:
From January 01, 2010, Friday to January 15, 2010, Friday, the average temperature was 56, 51, 53, 51, 53, 54, 53, 50, 52, 47, 48, 50, 51, 50, 52 degree on each day. What is the temperature going to be on January 16, 2010, Saturday?

Created answer file with temperature values


In [8]:
df[df['Date'] == '2010-01-16']

Unnamed: 0,Date,Avg Temperature
15,2010-01-16,53.7
