In [None]:
import sys
import os
import random
import json
import numpy as np
import pandas as pd
import time
import openai

from train_and_test import start_a_new_train, run_evaluation
from AU_LF_dataload_final import parse_news_TS_final, train_validation_split
from plot_comparision import plot_comparision
from gpt_news_evaluation import validation_with_evaluation_agent,gpt_chain_of_thoughts
from reselect_news import fetch_news,gpt_reselect_news,reselect_news_procedure
from justify_news_format_final import justify_news_format_final

# Initial News Selection by Reasoning Agent

In [None]:
prompt = '''Please summerize the logic of selection of news that will change the regional electricity load consumption.'''

format_output= ''' Predicting each state's region-level load consumption data in Australia with a time-frequency 
of 30 minutes per point involves understanding various factors. 

Positive Issues Leading to Increase in Load Consumption:

Short-Term:
1. Economic Growth: A surge in economic activity increases energy consumption.
2. Technological Advancements: New power-requiring technologies can spike demand.
3. Seasonal Factors: Extreme weather increases the use of air conditioning.
4. Social Events: Large-scale events temporarily boost energy use.

Long-Term:
1. Population Growth: Leads to higher residential energy consumption.
2. Industrial Development: Correlates with increased energy demands.
3. Urbanization: Expansion of cities contributes to higher energy usage.
4. Energy Transition: Shift towards electrically powered technologies.

Negative Issues Leading to Decrease in Load Consumption:

Short-Term:
1. Economic Downturns: Lead to decreased industrial activity and lower energy consumption.
2. Efficiency Improvements: Adoption of energy-efficient technologies reduces consumption.
3. Weather Patterns: Mild weather can reduce heating and cooling needs.
4. Public Health Crises: Can lead to reduced industrial and commercial activity.

Long-Term:
1. Energy Efficiency: Trends like better insulation and efficient appliances reduce consumption.
2. Demographic Changes: Aging populations or declining birth rates can lead to decreased energy use.
3. Policy and Regulation: Promote energy conservation and sustainability.
4. Technological Innovations: Development of more efficient technologies.

Other Factors:
- Political Stability: Impacts energy policies and investments.
- Global Market Dynamics: Affect local energy prices and consumption patterns.
- Environmental Consciousness: Leads to changes in consumption behavior and renewable energy adoption.
'''

openai.api_base = #enter your api base 
openai.api_key = #enter your api key 

response = openai.ChatCompletion.create(
    model="gpt-4-turbo-2024-04-09", # or another model
    messages=[
        {"role": "system", "content": "You are a helpful assistant analyzing electricity load predictions."},
        {"role": "user", "content": prompt + format_output}
    ]
)
print(response["choices"][0]["message"]["content"])

with open('/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/select_news_logic_iteration_0.txt', "w", encoding='utf-8') as file:
    file.write(response["choices"][0]["message"]["content"].replace("**","").replace("\n","").replace("###",""))

initial_reasoning = response["choices"][0]["message"]["content"].replace("**","").replace("\n","").replace("###","")

In [None]:
topic = "Future Load Consumption"
prompt2 = f'''
If I give you all news before the prediction, based on the above positive & negative issues analysis, 
1) please choose the news that may have a long-term affect on {topic};
2) please choose the news that may have a short-term effect on today's  {topic}.  
3) please choose the news that may have a real-time direct effect on today's  {topic}. 
If there is no suitable news, please say no. Also, please include the region (Australia/International/Others) and time information of these news. 
Organize the paragraph in this format: Long-Term Effect on Future {topic}: news is xxx; region is xxx; time is xxxx; the rationality is that xxx."
'''

format_output2="""
Remember to only give the json output including all relavant news and make it the valid json format.  Format is {
"Long-Term Effect on Future Load Consumption": [
        {
            "news": "Work on WA’s latest $1b lithium plant will start within days as US resources giant Albemarle begins building a major processing facility outside Bunbury, creating hundreds of jobs.",
            "region": "WA",
            "time": "2019-01-03 16:40:00",
            "rationality": "The construction and operation of a major lithium processing facility will likely influence long-term electricity demand through increased industrial activity and potential population growth in the area due to new job opportunities."
        },
        {
            "news": "Another major renewable energy project was initiated in WA, expected to supply significant power by 2022.",
            "region": "WA",
            "time": "2019-03-15 11:30:00",
            "rationality": "Long-term electricity load will be impacted by the integration of renewable energy sources, which are expected to offset dependence on traditional fossil fuels."
        }
    ],
    "Short-Term Effect on Today's Load Consumption": [
        {
            "news": "SA just sweltered through a very warm night, after a day of extreme heat where some regional areas reached nearly 48C.",
            "region": "SA",
            "time": "2019-01-03 17:57:00",
            "rationality": "Extreme weather conditions, particularly the intense heat, will lead to higher electricity consumption in the short term as residents and businesses increase the use of air conditioning and cooling systems to manage temperatures."
        },
        {
            "news": "A sudden cold snap in Victoria leads to a spike in electric heating usage.",
            "region": "VIC",
            "time": "2019-01-04 05:22:00",
            "rationality": "Short-term electricity load spikes are often caused by unexpected weather events that drive up heating or cooling demand."
        }
    ],
    "Real-Time Direct Effect on Today's Load Consumption": [
        {
            "news": "An unseasonal downpour has wreaked havoc on Perth’s electricity network this morning.",
            "region": "WA",
            "time": "2019-01-03 10:11:00",
            "rationality": "The sudden weather event causing disruptions to the electricity network can have an immediate impact on load consumption due to power outages, infrastructure damage, or emergency response measures."
        },
        {
            "news": "Lightning strike at a major substation causes widespread outages in Sydney.",
            "region": "NSW",
            "time": "2019-01-03 19:45:00",
            "rationality": "Direct effects on load consumption include sudden drops in power supply, triggering emergency measures to restore stability in the network."
        }
    ]}"""

# Initialize an empty DataFrame
selected_news = pd.DataFrame(columns=['time', 'news'])
file_path = "/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_news/news_processed_data_2019-2022.json"
        
with open(file_path, 'r', encoding='utf-8') as file:
    data = json.load(file)
news_df = pd.DataFrame(data)
news_df['publication_time'] = pd.to_datetime(news_df['publication_time'])

dates_range= pd.date_range(start=f"2019-01-01", end=f"2021-01-01")

for date in dates_range:
    formatted_date = date.strftime('%Y-%m-%d')
    news_before,news_after=fetch_news(date, 1, news_df)
    if (news_before == " No news found before the prediction date.") & (news_after == " No news found on the prediction date."):
        continue
    prompt1 = f"The prediction date is {formatted_date}."
    prompt3 = f"The news happened before and on the prediction include:{news_before+news_after}"

    prompt = initial_reasoning + prompt1 + prompt2 + prompt3 + format_output2
    response = gpt_reselect_news(prompt)
    response = response[response.find("{"):response.rfind("}") + 1].replace("\n", "")
    print(response)

    try:
        response_json = json.loads(response)
        print("The response is in JSON format.")
    except json.JSONDecodeError:
        print("The response is not in JSON format.")

    news_string = response #response
    df_extended = pd.DataFrame({'time': [formatted_date], 'news': [news_string]})
    selected_news = pd.concat([selected_news, df_extended], ignore_index=True)


    csv_file_path = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AU_load_news_dataframe_2019-2020_iteration_0.csv'
    selected_news.to_csv(csv_file_path, index=False, encoding='utf-8')  # index=False 表示不保存行索引

print(selected_news.head())

In [None]:
news_csv = pd.read_csv(csv_file_path)
justify_news_format_final(news_csv,'/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AU_load_merge_file_final_iteration_0.csv')

# Training Data Preparation

In [None]:
weather_data_file = "/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/combined_weather_data.csv"
news_data_file = "/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AU_load_merge_file_final_iteration_0.csv"
ts_file = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF-2019-2023.csv'
save_file = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_output_list_v4_3_2019-2020_iteration_0.json'

result_list = parse_news_TS_final(weather_data_file,news_data_file,ts_file,save_file)


In [None]:
with open(save_file, 'r', encoding='utf-8') as file: 
    result_list = json.load(file)

# Train & Test the Model

In [None]:
json_file_path = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_output_list_v4_3_2019-2020_iteration_0.json'
train_save_file = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_train_data_v4_3_2019-2020_iteration_0.json'
val_save_file = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_validation_data_v4_3_2019-2020_iteration_0.json'
val_num = 100

train_validation_split(json_file_path,train_save_file,val_save_file,val_num)

In [None]:
start_a_new_train(
    'AU-LF-test-iteration-0',
    '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_train_data_v4_3_2019-2020_iteration_0.json',
    resume_from_checkpoint='/root/autodl-tmp/results/AU-LF-test-iteration-0/checkpoint-500',
    result_saving_dir='/root/autodl-tmp/results/',
    learning_rate='1e-4',
    epoch=4,
    save_steps=250,
    prompter_name='ts_test',
    per_device_train_batch_size=2,
    gradient_accumulation_steps=6,
    lora_r=8,
    lora_alpha=16,
)

In [None]:
run_evaluation(
    '/root/autodl-tmp/results/AU-LF-test-iteration-0/checkpoint-1000',
    '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_validation_data_v4_3_2019-2020_iteration_0.json',
    '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_test_result_v4_3_2019-2020_iteration_0.json',
    prompter_name='ts_test',
)

In [None]:
actuals_file = "/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_validation_data_v4_3_2019-2020_iteration_0.json"
predictions_file = "/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_test_result_v4_3_2019-2020_iteration_0.json"
plot_comparision(predictions_file,actuals_file)

# News Logic Upgrade

In [None]:
import time

predictions_file = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_test_result_v4_3_2019-2020_iteration_0.json'
actuals_file = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AULF_validation_data_v4_3_2019-2020_iteration_0.json'
all_news_file = "/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_news/news_processed_data_2019-2022.json"

# Load the initial content for selecting news logic
# If it's after the second training or later, select the previous news logic
with open('/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/select_news_logic_iteration_0.txt', 'r') as file:
    selection_news_logic_latest = file.read()
    print(selection_news_logic_latest)
    
# Load the initial format for selecting news logic
with open('/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/select_news_logic_iteration_format.txt', 'r') as file:
    selection_news_logic_format = file.read()
    print(selection_news_logic_format)

with open(actuals_file, 'r', encoding='utf-8') as f:
        actuals_data = json.load(f)

In [None]:
start_time = time.time()  # Start time for the entire process

all_response = []

for i in range(len(actuals_data)):
    iteration_start = time.time()
    
    actual,errors,background,historical_time,predictions_time,selected_news,all_news=validation_with_evaluation_agent (i,predictions_file, actuals_file,all_news_file)
    response = gpt_chain_of_thoughts(background, selected_news, all_news, predictions_time, actual, errors,selection_news_logic_format,selection_news_logic_latest)
    if response == "'str' object has no attribute 'get'":
        print(response)
        iteration_end = time.time()  # End time for the current iteration
        print(f"Iteration {i} completed in {iteration_end - iteration_start:.2f} seconds")
        continue
    else:
        print(response) 
        selection_news_logic_latest = response.replace("**", " ").replace("###", " ")
        all_response.append(response)
        iteration_end = time.time()  # End time for the current iteration
        print(f"Iteration {i} completed in {iteration_end - iteration_start:.2f} seconds")

total_time = time.time() - start_time  # Total time after all iterations are completed
print(f"All iterations completed in {total_time:.2f} seconds")

#saving the updated logic
all_response_array = np.array(all_response)
np.save('/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/adjusted_selection_logic_iteration_0.npy', all_response_array)

In [None]:
import openai
openai.api_base = #enter your api base 
openai.api_key = #enter your api key 
selection_news_logic_all = np.load('/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/adjusted_selection_logic_iteration_0.npy')

file_path = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/select_news_logic_iteration_0.txt'
with open(file_path, "r", encoding="utf-8") as file:
    initial_logic = file.read()


In [None]:
prompt1 = f'''
Improve and polish this paragraph to reduce repeated content and summarize the news selection logic 
that affects the electricity load consumption:{selection_news_logic_all.tolist()}'''

prompt2 = f'''
According to the given updated logic, please directly rephrase the current prediction
logic and output the adjusted new logic. This is the current prediction logic that you need to adjust
and improve: {initial_logic}'''

response = openai.ChatCompletion.create(
            model="gpt-4-turbo-2024-04-09",
            messages=[
                {"role": "system", "content": "You are a helpful assistant analyzing electricity load predictions."},
                {"role": "user", "content": prompt1+prompt2}
            ]
        )
print(response["choices"][0]["message"]["content"] )
new_logic = response["choices"][0]["message"]["content"] 
with open('/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/select_news_logic_iteration_1.txt', "w", encoding='utf-8') as file:
    file.write(response["choices"][0]["message"]["content"].replace("**","").replace("\n","").replace("###",""))


# Re-Select News in the Next Iteration

In [None]:
with open('/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/select_news_logic_iteration_1.txt', 'r') as file:
    initial_reasoning = file.read()
    print(initial_reasoning)
    
raw_news_file_path = "/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_news/news_processed_data_2019-2022.json"
csv_file_path = '/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AU_load_news_dataframe_2019-2020_iteration_1.csv'
reselect_news_procedure(raw_news_file_path,csv_file_path,initial_reasoning)

In [None]:
news_csv = pd.read_csv("/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AU_load_news_dataframe_2019-2020_iteration_1.csv")
save_file = "/root/llama2-lora-fine-tuning/ipython_notebook_1004/Data_all/AU_load/AU_load_merged_file_final_iteration_1.csv"
justify_news_format_final(news_csv,save_file)