In [1]:
# %load_ext watermark
import pandas as pd
import numpy as np
import session_config
import reports
import roughdraft
import geospatial

from roughdraft import messages_for_chat_completion
from reportprompts import report_system_prompt, split_text_on_phrase, report_summary_prompt, report_stratification_prompt, executive_corrections_prompt
from reportprompts import grid_approximation_prompt, report_regression_prompt, final_draft_prompt, corrections_prompt, executive_summary_prompt

from IPython.display import Markdown
from dotenv import load_dotenv
import os
from langchain_openai import ChatOpenAI
import openai



# Making a report

## The initial draft:

In [16]:
def make_call_to_llm(system_prompt, user_prompt, user_args, sys_args, client):
    messages = messages_for_chat_completion(system_prompt=system_prompt(sys_args), user_prompt=user_prompt(user_args))
    completed_chat = client.invoke(messages)
    return completed_chat


def first_draft(r_args, info_columns, local_directory, revisionname, roughdraftname):

    # construct the meta data and the basic reports
    this_report = session_config.report_args(**r_args)
    report_data, report_meta = session_config.report_meta_data(**this_report)
    all_report, all_land_use = reports.make_report_objects(report_data, info_columns = info_columns)
    
    # assign local file storage
    report_meta['resources'] = local_directory
    text_for_esummary = report_meta['resources']+ revisionname
    roughdraft_location = report_meta["resources"] + roughdraftname
    
    # build report
    args = {
        'report_meta': report_meta,   
        'survey_report': all_report,
        'landuse_report': all_land_use   
    }    
    # a report
    firstdraft = roughdraft.ReportTexts(**args)

    return firstdraft, report_meta
    
def report_map(firstdraft, lat_lon, report_meta, pdf_columns, show: bool = False):
    markers, bounds = geospatial.map_markers(firstdraft.survey_report.sample_results(info_columns=firstdraft.info_cols), lat_lon)
    map_legend_markers = geospatial.situation_map(markers, firstdraft, data['linear_methods'], pdf_columns, report_meta)
    geospatial.situation_map_plot(map_legend_markers, report_meta, location_markers=location_markers, show=show)
    return map_legend_markers

def rough_draft(report_meta, firstdraft, rargsdata, title_notes: str = None):
    roughdraft_location = report_meta["resources"] + roughdraftname
    
    # a complete report is a string representation
    # of the ReportTexts class
    # rough draft, prompts and data
    asum, sampstrat, grid_f, lin_f, inv_f, data  = firstdraft.string_rep(roughdraft_location, rargsdata)

    # report components
    # title,, map markers and map legend
    title = roughdraft.construct_report_title_and_subtitle(report_meta, notes=title_notes)   

    return asum, sampstrat, grid_f, lin_f, inv_f, data, title

def report_consolidated_results(data, key: str = None):
    if key is None:
        key = 'survey_totals_for_all_info_cols'
    aggregate_results = []
    for i in list(range(0, len(data[key]))):
        if len(data['survey_totals_for_all_info_cols']) > 1:
            label = data[key][i].columns[0]
            caption = f'Sample totals : {label}, average, quantity, number of samples'
            table = data[key][i].style.set_table_styles(session_config.table_css_styles).format(**session_config.format_kwargs).set_caption(caption).hide()
            if label == 'parent_boundary':
                label = 'survey area'
            
            aggregate_results.append({'label':label, 'table':table})
    return aggregate_results


def stratification_report(data, key, suffix, caption):   

    stratification_of_sampling = data[key].style.set_table_styles(session_config.table_css_styles).format(**session_config.format_kwargs).set_caption(caption)
    with open(f'{report_meta["resources"]}{suffix}', 'w') as file:
        file.write(stratification_of_sampling.to_html())
        print(f'{report_meta["resources"]}{suffix}')
    return stratification_of_sampling


def inventory_report(data, caption, report_meta):
    
    inventory_table = data['inventory'].style.set_table_styles(session_config.table_css_styles).format(**session_config.format_kwargs).set_caption(caption).hide()
    with open(f'{report_meta["resources"]}inventory_table.html', 'w') as file:
        file.write(inventory_table.to_html())
        print(f'saving: {report_meta["resources"]}inventory_table.html')
    return inventory_table

survey_data = session_config.collect_survey_data()
new_lu = pd.read_csv('data/in_process/new_lu.csv')

split_phrase = 'Frequently asked questions'
pdf_columns = ['buildings', 'forest', 'undefined', 'streets', 'public-services', 'recreation']
matching = [
    'buildings',
    'forest',
    'public-services',
    'recreation',
    'undefined',
    'streets'
    
]
# rcodes = session_config.code_use.index
rcodes = session_config.tobo_snacks
caption_sampstrat = ''.join(session_config.land_use_description)
caption_ratestrat = ''.join(session_config.landuse_litter_density)


# initial_mask = (survey_data.feature_type == 'l')
rargsdata = survey_data.copy()

r_args = {
    'data': rargsdata,
    'start': '2020-01-01', 
    'end':'2021-05-31', 
    'name':'Biel/Bienne - example', 
    'boundary':'city', 
    'boundary_name': 'Biel/Bienne', 
    'feature_type': None, 
    'feature_name': None, 
    'report_codes': rcodes, 
    'columns_of_interest': session_config.feature_variables}

final_review = 'biel_city_example.md'
local_directory = 'bielcity/'
title_notes =  '\n\n <i>Proof of concept: llm assissted reporting grid forecasting example</i>\n\n'
location_markers = "o"
roughdraftname = 'roughdraft.md'
revisionname = 'text_for_esummary.md'
info_columns = ['canton', 'city', 'feature_name']


firstdraft, report_meta = first_draft(r_args, info_columns, local_directory, revisionname, roughdraftname)
asum, sampstrat, grid_f, lin_f, inv_f, data, title = rough_draft(report_meta, firstdraft, rargsdata, title_notes=title_notes)
consolidated_reports = report_consolidated_results(data)
land_use_rates = stratification_report(data, 'landuse_rates', 'sampling_stratification_pcsm.html', caption_ratestrat)
land_use_profile = stratification_report(data, 'landuse_profile', 'sampling_stratification.html', caption_sampstrat )

map_legend_markers = report_map(firstdraft, session_config.lat_lon, report_meta, pdf_columns, show=False)
aggregate_results = report_consolidated_results(data)

inventory_table = inventory_report(data, ' ', report_meta)
reports.boxplots_prior_likelihood(firstdraft.survey_report.sample_results(),data['grid_approximation'], report_meta=report_meta)

# scatterchart temporal survey totals
scatter_plot_args = {
    'data': [(firstdraft.survey_report.sample_results(), 'Survey totals', 'red')],
    'file_name': 'scatter_plot_likelihood.jpg',
    'report_meta': report_meta,
    'title': f"{report_meta['name']} {report_meta['start']} {report_meta['end']}",
    'show' : False
}
reports.scatter_plot_standard(**scatter_plot_args)

new report meta data
boundary is not none
processing admin boundaries
processing named features
processing summary statistics
processing material composition
processing survey totals for all info cols
processing inventory
processing landuse profile
processing landuse rates
processing grid approximation
Processing combined...
Processing out_boundary...
processing linear methods
boundary is not none
bielcity/sampling_stratification_pcsm.html
bielcity/sampling_stratification.html
defining map components
making location report
saving map to bielcity/situation_map.jpg
saving: bielcity/inventory_table.html
file saved to: bielcity/boxplots_observed_expected.jpeg
file saved to: bielcity/scatter_plot_likelihood.jpg


## Using the llm

In [17]:
chatcompletion = True
def report_system_prompt(roughdraft):
    system_prompt = (
    "You are a researcher assigned the task of preparing the first draft of a manuscript from a rough draft and answering specific questions "   
    "You will be given a list of information to provide and a list of questions.Provide the information and answer the questions "
    "according to the document provided and the instructions given. The instsructions are marked like this '<!--- INSTRUCTION_START your '"
    "'instructions will be found here INSTRUCTIONS_END -->'. Follow the instructions exactly. "
    "Answer the question completely and use an authoritative voice. Do not invent or make up answers. If you use a web resource you must give the link. "
    "You must give numerical examples from the report when you answer a question about the survey results.  "
    "You may use your base knowledge to complete an answer."
    "A faithfull transimission of the results in the roughdraft to the user is the the most important part of this task. Only provide inference or analyis unless specifically asked."
    "All answers should be in paragraph form. Here is the document of reference :\n\n" 
    f"{roughdraft}"
    )
    return system_prompt


In [18]:
def grid_approximation_prompt():

    request = (
        "Explain what a grid approximation is and how it can be constructed using an inference table. "
        "define what an inference table is and define what a prior and posterior are and how they are used in an inference table. "
        "For each prior in teh report results cite the name of the prior, and the similarity threshold of the prior, round the answers to two decimal places. "
        "Compare the different posterior distributions to the observed results (in pcs/m) ? Should an increase or decrease be expected ? "
        "in the future how likely is a person to notice an increase or decrease from the observed results if they take one sample ? how about if they take two samples?"
    )

    insone = (
        "<!-- INSTRUCTION_START Consider whether the observed data is likely to be normally distributed or not (use the difference between the median "
        "and mean in summary statistics section). You must cite the values you use in the calculation. Describe the implications on predictions if the data "
        "is normally distributed or not, given the units and the context consider the case where the data is normally distributed"
        " INSTRUCTION_END -->\n"
    )

    insthree = (
        "<!-- INSTRUCTION_START Cite the name of the prior and the expected average and median pcs/m. INSTRUCTION_END -->\n"

    )

    insfour = (
        "<!-- INSTRUCTION_START Consider that the prior is comprised only of locations that are in the same geographic boundary. "
        "Recall that the posterior is a weighted average of the prior and likelihood, so if the in-boundary prior predicts an increase "
        "it is likely that elevated values were observed in other locations within the boundary compared to the likelihood "
        " INSTRUCTION_END -->\n"
    )

    insfive = (
        "<!-- INSTRUCTION_START Consider that the prior is comprised only of locations that are outside the geographic boundary. "
        "Recall that the posterior is a weighted average of the prior and likelihood, so if the out-boundary prior predicts an increase "
        "it is likely that locations outside of the region had elevated values compared to the likelihood "
        " INSTRUCTION_END -->\n"
    )

    inssix = (
        "<!-- INSTRUCTION_START You will find the observed pcs/m in the Summary statistics section. For each posterior there is a "
        "section. Consult the table of values in each section. Consider the average pcs/m result of each posterior in relation to the observed average "
        "Cite the numerical differences, given the standard deviation (in pcs/m)  "
        " INSTRUCTION_END -->\n"
    )
    questions = (
        "<!-- INSTRUCTION_START Label the questions section 'Frequently asked questions' (###) INSTRUCTION_END -->\n"
        f"1. Why is grid approximation a reasonable modeling technique given the data ?{insone}"
        "2. Do you have an example of other fields or domains that use grid approximation or bayesian methods ?"
        "3. If the data is normally distributed would the predictions from the grid approxmation and the predictions from the normal distribution be different ? If so in what way ?"
        "4. What is the difference between grid approximation and linear or enemble regression ?"
        f"5. With which posterior do we expect to the find most ? The least ?{insthree}"
        f"6. If the in-boundary grid approxmation predicts an increase or decrease, what does that say about the other samples from within the boundary ?{insfour}"
        f"7. If the out-boundary grid approxmation predicts an increase or decrease, what does that say about the other samples from outside of the boundary ?{insfive}"
        f"8. How different are the expected results from the observed results ? Should an increase or decrease be expected ? {inssix}"

    )

    formatting_instructions = (
        "\n<!-- INSTRUCTION_START \n"
        "1. Label the summary 'Forecasts and methods' (##)\n"
        "2. Label the questions section 'Frequently asked questions' (###) \n"
        "3. Repeat the question (in bold) DO NOT REPEAT the instructions. and then answer\n"
        " INSTRUCTION_END -->\n"
    )



    user_prompt = (

        f'{request}{inssix}\n\n'
        f'{questions}\n\n'
        f'{formatting_instructions}'
    )

    return user_prompt


if chatcompletion is True:
    
    load_dotenv()
    api_key = os.getenv('OPENAI_API_KEY')
    model_rough_draft = session_config.model_rough_draft
    client = ChatOpenAI(model=model_rough_draft)
    
    messagesx = messages_for_chat_completion(system_prompt=report_system_prompt(f'{grid_f}\n{firstdraft.summary_statistics()["prompt"]}'), user_prompt=grid_approximation_prompt())    
    completed_chatx = client.invoke(messagesx)
    grid_forecast = completed_chatx.content 
    print('grid forecast complete')

grid forecast complete


In [19]:
def report_stratification_prompt():

    request = (
        "Define sampling stratification and land-use, explain how it applies to the survey results. "
        "Identify the highest pcs/m values in the sampling stratification and trash density table (consider only forest, buidling, undefined and streets) "
        "Use the results from the sampling stratification tabel to classify the surveyed locations as either urban, mixed, or rural"       
    )

    req_inf = (
        "<!-- INSTRUCTION_START "
        "To determine if a location is urban you must sum the proportions of samples for buildings for the rows 60-80% and 80 - 100% of the sampling stratification table. "
        "If this sum is greater than 50% the area is considered urban. To determine if a location is urban you must sum the proportions of samples "
        "for forests for the rows 60-80% and 80 - 100% of the sampling stratification table.If this sum is greater than 50% the area is considered rural. "
        "If both sums are less than 50% the area is considered mixed. "        
        " INSTRUCTION_END -->\n"
    )

    insone = (
        "<!-- INSTRUCTION_START Provide examples from the 'sampling stratification and trash density table' consider the results from the buildings, forest, and undefined columns."
        "Find the two highest values and report the proportion of buffer they occupy and the proportion of the samples that were taken."
        " and explain what the values mean. Give two examples from the table. Do not draw any conclusions. Reply in paragraph form."
        " INSTRUCTION_END -->\n"
    )

    instwo = (
        "<!-- INSTRUCTION_START Follow all instructions in the document and reply in paragraph form."
        " INSTRUCTION_END -->\n"
    )

    insthree = (
        "<!-- INSTRUCTION_START Provide examples from the 'sampling stratification and trash density table' consider the results from the buidlings, forest, and undefined columns. "
        "Find the two highest values and report the proportion of buffer they occupy and the average pcs/m."
        " INSTRUCTION_END -->\n"
    )

    insfour = (
        "<!-- INSTRUCTION_START Recall the definition for urban and rural is provided in the document instructions. If the sampling stratification does not meet either criteria, say so and reply with "
        "prportion of the buffer that contains the greates proportion of samples for buildings, forest and undefined "
        " INSTRUCTION_END -->\n"
    )

    questions = (
        "<!-- INSTRUCTION_START Label the questions section 'Frequently asked questions' (###) INSTRUCTION_END -->\n"
        f"1. What does the sampling stratification table tell us ?{insone}"
        f"2. How can the information in the sampling stratification and trash density table help identify areas of concern ?{instwo}"
        f"3. Under what landuse conditions would a surveyor expect to find the most trash ?{insthree}"
        f"4. Given the results in the sampling stratification table, were these surveys collected in mostly urban environment or forested?{insfour}"

    )

    formatting_instructions = (
        "\n<!-- INSTRUCTION_START\n"
        "1. Label the summary 'Sampling stratification' (##)\n"
        "2. The label for the questions section is 'Frequently asked questions' (###) \n"
        "3. Repeat the question (in bold) DO NOT REPEAT the instructions. and then answer\n"
        " INSTRUCTION_END -->\n"
    )

    user_prompt = (
        f'{request}{req_inf}\n\n'
        f'{questions}\n\n'
        f'{formatting_instructions}'
    )

    return user_prompt


if chatcompletion is True:
    messagesxxx = messages_for_chat_completion(system_prompt=report_system_prompt(sampstrat), user_prompt=report_stratification_prompt())
    completed_chatxxx = client.invoke(messagesxxx)
    sampling_stratification = completed_chatxxx.content
    print('sampling stratification complete')


sampling stratification complete


In [20]:
def report_regression_prompt():

    request = (
        "Define cluster analysis (kmeans)"
        'Identify the cluster that had the highest pcs/m and cite the composition of buidlings, forest, undefined, the units are average proportion of the buffer'
        "Define linear regression and ensemble regression, explain the basic assumptions of each method"
        "If a regression analysis was conducted cite the model that had the highest r², cite the name and the MSE. "
        "If their was a regression analysis conducted, what conclusions can be drawn given the best model ? Given the r² and MSE of the best model how reliable would predictions be ?"
    )

    req_inst = (
        "<!-- INSTRUCTION_START Provide concise answers to each request. Label these answers 'Linear and ensemble regression' (##) INSTRUCTION_END -->\n"

    )

    insone = (
        "<!-- INSTRUCTION_START  If their was no regression analysis the report will tell you why. If there is a table, provide that table to user and "
        "write a narrative paragraph of all the results."
        " INSTRUCTION_END -->\n"
    )

    insfour = (
        "<!-- INSTRUCTION_START It is possible that there is no cluster analysis. The report will tell you why. This is a valid answer. "
        "The average pcs/m is given as a table in the cluster analysis subsection and given as objects per meter. "
        "The distribution of land use values is given in the cluster analysis subsection and given as a float value that represents "
        "the average proportion of the buffer zone occupied by the land use category. The paragraph above the table explains how to interpret the table"
        " when you provide the results for the cluster lable the results as % of buffer occupied by land use feature INSTRUCTION_END -->\n"
    )

    questions = (
        "<!-- INSTRUCTION_START Label the questions section 'Frequently asked questions' (###) INSTRUCTION_END -->\n"
        f"1. What were the r² and MSE of each test ? {insone}"
        f"2. Given the r² and MSE of the different methods employed, how reliable do you think predictions would be based on these models ?\n"
        f"3. Can any conlusions be drawn from these results ?\n"
        f"4. Accroding to the cluster analysis what is the cluster that has the greatest average pcs/m ? What is the distribution of land use values within the cluster ? {insfour}"
    )

    formatting_instructions = (
        "\n<!-- INSTRUCTION_START \n"
        "1. The label for the whole section is 'Linear and ensemble methods' (##)\n"
        "2. The label for the questions section is 'Frequently asked questions' (###) \n"
        "3. Repeat the question (in bold) DO NOT REPEAT the instructions. and then answer\n"
        " INSTRUCTION_END -->\n"
    )
    user_prompt = (

        f'{request}{req_inst}\n\n'
        f'{questions}\n\n'
        f'{formatting_instructions}'
    )

    return user_prompt

if chatcompletion is True:
    messagesxx = messages_for_chat_completion(system_prompt=report_system_prompt(lin_f), user_prompt=report_regression_prompt())
    completed_chatxx = client.invoke(messagesxx)
    linear_methods = completed_chatxx.content
    print('linear methods complete')


linear methods complete


In [21]:
def report_summary_prompt():

    user_prompt = f"""\n

    Provide a summary of the following sections: Administrative boundaries, Named features, Summary statistics, Municipal results, Material composition.

    The following must be included in the summary:

    * The Name and number of the cities in the report <!--- INSTRUCTION_START The names of the lakes, rivers and parks are in the named features section, the municipal results are in the city total section INSTRUCTIONS_END -->\n
    * The name and number of lakes, rivers or parks in the report : <!--- INSTRUCTION_START The names of the lakes, rivers and parks are in the named features section, the municipal results are in the city total section INSTRUCTIONS_END -->\n
    * The start and end date of the sampling and the name of the survey area(s), the name of the survey areas must have the first letter capitalized.\n
    * The numbar of samples, the average pcs/m, the median pcs/m, the maximum pcs/m, the standard deviation and the total number of objects identified\n
    * The five most common objects by greatest quantity <!--- INSTRUCTION_START Provide the name of the object, the fail rate, the percent of total, the pcs/m and the total quantity. Convert the fail rate to percent. define the fail rate. INSTRUCTIONS_END -->
    * The material composition\n   

    Frequently asked questions:

    1. What were the ten most common items found ? <!--- INSTRUCTION_START Provide the name of the object, the fail rate and the percent of total. Convert the fail rate to percent. define the fail rate. INSTRUCTIONS_END -->
    2. Are these objects found on european beaches ? If so is their any data on how many per 100 m of beach ? <!--- INSTRUCTION_START you may use your base knowledge to answer this question, consider OSPAR results from 2022 or 2021 provide the url for the results INSTRUCTIONS_END -->
    3. What are possible sources of these specific objects objects ? <!--- INSTRUCTION_START you may use your base knowledge to answer this question INSTRUCTIONS_END -->
    4. Which three cities had the highest average pcs/m ? Which three had the lowest ?


    <!--- INSTRUCTION_START\n

    formatting instructions:

    1. Label the summary 'Sample results' (##)\n
    2. The label for the questions section is 'Frequently asked questions' use markdown formatting for the label (###)\n
    3. Repeat the question (in bold) DO NOT REPEAT the instructions. and then answer\n

    INSTRUCTIONS_END -->
    """
    return user_prompt


if chatcompletion is True:
    messages = messages_for_chat_completion(system_prompt=report_system_prompt(asum), user_prompt=report_summary_prompt())
    completed_chat = client.invoke(messages)
    summary = completed_chat.content
    print('summary complete')

summary complete


In [22]:
# messages_for_chat_completion(system_prompt=final_draft_prompt(sampstrat), user_prompt=make_stratification_prompt('English'))
model_corrections = session_config.model_corrections
client = ChatOpenAI(model=model_corrections)

def corrections_prompt(document):
    request = (
        "Please check the answers provided in this revised draft to the rough draft that you have. Any numerical float values should be rounded to two places. "
        "Return the document with the corrections please. Do not add any comments about the corrections, just do them and return the corrected document. "
        "If definitions are not clear you may rephrase them, consider the audience is non technical. "

    )

    req_inf = (
        "<!-- INSTRUCTION_START We are concerned with numrical results and conclusions. "
        "Ensure that any conclusions are correct according to standard practice and the methods explained in the rough draft instructions. Do not change the markdown formatting of the document."
        " INSTRUCTION_END -->\n"
    )
    user_prompt = (
        f'{request}{req_inf}\n'
        'The revised draft: \n\n'
        f'{document}')

    return user_prompt


def executive_summary_prompt(roughdraft):
    system_prompt = (
        "You are a researcher assigned the task of writing the executive summary of a report from a revised report. "
        "You are tasked to draft the executive summary and ensure that key points in the rough draft do not get excluded. "
        "There are instructions in the rough draft labeled <!--- INSTRUCTION_START  your instructions are in here INSTRUCTION_END -->. Ensure the insstructions were followed"
        "The rough draft:\n"
        f"{roughdraft}"
    )
    return system_prompt




def executive_corrections_prompt(document):
    request = (
        "Make an executive summary of the revised draft below, use four paragraphs.\n"
      
        "Summarize the results from each section, use the following guidelines : \n\n"
        "1. **Sample results:** Include the number of cities, lakes, rivers and parks in the report, the start and end date. Include the five most common items as well as the fail rate and "
        "% of total for each of the five objects. Cite the the number of samples, the average pcs/m, the maximum pcs/m, the standard deviation pcs/m and the median. Incude the total number of objects identified.\n "
        "2. **Samping stratification and sampling stratification and trash density:** Define sampling stratification in one sentence. Include the classification of urban or rural or mixed "
        " land useclassification. Explain what the sampling stratification table is for and cite the condition with the highest pcs/m, the proportion of buffer the proportion of samples.\n"
        "3. **Linear and ensemble regression:** If a regression analysis was not conducted state the reason. If a regression analysis was conducted "
        "cite the model with the highest r² and the corresponding MSE. Cite features of importance using the permutation feature importance and the model feature importance"
        "If a regression analysis was conducted and you have an r² and MSE, state the reliability of predictions given these results.\n"
        "4. **Forecasts and methods:** Define grid approximation and the method used in the report, concisely and briefly. Explain the hypothesis of each prior. Give the "
        "posterior results for each of the defined priors. Compare the posterior results to the observed results and state whether an increase or decrease is expected.\n"
    )

    req_inf = (
        "<!-- INSTRUCTION_START Label this section Executive summary (##). Your opinion is not needed, only draw conclusions from the data. "
        "Ensure that any conclusions are correct according to standard practice and the methods explained in the rough draft instructions. Keep the executive summary to four paragraphs."
        " INSTRUCTION_END -->\n"
    )

    user_prompt = (
        f'{request}{req_inf}\n'
        'The revised draft: \n\n'
        f'{document}')

    return user_prompt

def final_draft_prompt(roughdraft):
    system_prompt = (
        "You are a researcher preparing a manuscript from a revised draft. "
        "You are tasked to correct the individual sections of the revised draft for the manuscript. "
        "Ensure that answers are correct by comparing the revised draft to the rough draft. Use an academic voice. "
        "Ensure that conclusions are correct by reviewing the rough draft and the responses in the reivsed draft."
        "Do not change the formatting of the revised draft."
        "There are instructions in the rough draft labeled <!--- INSTRUCTION_START  your instructions are in here INSTRUCTION_END -->. Ensure the insstructions were followed"
        "The rough draft:\n"
        f"{roughdraft}"
    )
    return system_prompt

In [23]:
corrected_summary = make_call_to_llm(final_draft_prompt, corrections_prompt,summary, asum, client)
print('corrected_summary complete')

corrected_summary complete


In [24]:
corrected_sampling_strat = make_call_to_llm(final_draft_prompt, corrections_prompt, sampling_stratification, sampstrat, client)
print('corrected sampling strat complete')

corrected sampling strat complete


In [25]:
corrected_linear = make_call_to_llm(final_draft_prompt, corrections_prompt, linear_methods, lin_f, client)
print('corrected linear complete')

corrected linear complete


In [26]:
corrected_grid = make_call_to_llm(final_draft_prompt, corrections_prompt, grid_forecast, f'{grid_f}\n{firstdraft.summary_statistics()["prompt"]}', client)
print('corrected grid complete')

corrected grid complete


In [27]:
initial = [corrected_summary, corrected_sampling_strat, corrected_linear, corrected_grid]
roughdraft_location = 'examples/roughdraft.md'

def executive_summary(roughdraft, initial, title):
    revised = '\n'
    for block in initial:
        revised += block.content + '\n\n'
    revision = title + '\n\n' + revised
    with open(f"{report_meta['resources']}revision.md", 'w') as file:
        file.write(revision)
    
    with open(roughdraft, 'r') as file:
        rd = file.read()
        
    esummary = make_call_to_llm(executive_summary_prompt, executive_corrections_prompt,revision, rd, client)
    return esummary
esummary = executive_summary(roughdraft_location, initial, title)


finished_summary, finished_faq_summary = split_text_on_phrase(corrected_summary.content, split_phrase)
finished_summary_strat, finished_faq_summary_strat = split_text_on_phrase(corrected_sampling_strat.content, split_phrase)
finished_summary_linear, finished_faq_summary_linear = split_text_on_phrase(corrected_linear.content, split_phrase)
finished_summary_grid, finished_faq_summary_grid = split_text_on_phrase(corrected_grid.content, split_phrase)

## Finished report

In [28]:
# components in markdown
open_div ="\n<div>"
table_grid = "\n\n::::{grid}\n\n:::{grid-item}\n\n"
# split_phrase = 'Frequently asked questions'
# map
map_of_locations = report_meta['resources']+'situation_map.jpg'
situation_map_grid = "\n::::{grid} 1\n:margin 0\n:padding: 0\n\n:::{grid-item-card}\n:padding: 0\n:img-background: "  + f"{map_of_locations}\n\n:::\n\n::::\n"

# the cluster results go with the map
cluster_results = f'{open_div}\n\n{map_legend_markers["cluster_results"].to_html()}\n\n</div>\n'
cluster_results_in_grid = f'{table_grid}{cluster_results}\n\n:::\n\n::::'

# grid forecast boxplots
boxplots_location = report_meta['resources'] + 'boxplots_observed_expected.jpeg'
summary_grid_images ="\n::::{grid} 1\n:margin: 0\n\n:::{grid-item}\n:padding: 0\n\n![image info]"+ f"({boxplots_location})\n\n:::\n\n::::\n"

# sampling stratification
strat_of_sampling_table = f"{open_div}\n\n{land_use_profile.to_html()}\n\n</div>\n\n"
table_in_grid_strat= f'{table_grid}{strat_of_sampling_table }\n\n:::\n\n::::'


# stratification of pcsm
strat_pcs_m  = f"{open_div}\n{land_use_rates.to_html()}</div>\n\n"
table_in_grid_strat_pcs= f'{table_grid}{strat_pcs_m}\n\n:::\n\n::::'

# aggregate results of city or or canton
aggregates = False

if len(aggregate_results) > 0:
    aggregates = True
    agg_label = "\n## Consolidated results : "
    agg_label_keys = [x['label'] for x in aggregate_results]
    agg_label_keys = ', '.join(agg_label_keys)
    agg_label = agg_label + agg_label_keys + "\n"
    agg_content = ""
    
    for a_table in aggregate_results:      
        # print(a_table["table"].to_html())
        in_div = f'{open_div}\n\n{a_table["table"].to_html()}\n\n</div>\n'
        in_grid = f'{table_grid}{in_div}\n\n:::\n\n::::'
        agg_content += in_grid + "\n"
        
    agg_tables = agg_label + agg_content

# the inventory of all the objects
inventory_tables = f"{open_div}\n{inventory_table.to_html()}\n\n</div>\n"

dropdown_summary = "\n:::{dropdown} Sample results frequently asked questions\n"
dropdown_summary_strat = "\n:::{dropdown} Sampling stratification frequently asked questions\n"
dropdown_summary_lin = "\n:::{dropdown} Linear methods frequently asked questions\n"
dropdown_summary_grid = "\n:::{dropdown} Grid approximation frequently asked questions\n"

with open(final_review, 'w') as file:
    file.write(title)

with open(final_review, 'a') as file:
    executive_summary =f'\n{esummary.content}'
    file.write(executive_summary)
    file.write('\n' + finished_summary)
    file.write(f'{dropdown_summary}{finished_faq_summary}\n:::\n')
    file.write(situation_map_grid)
    file.write(cluster_results_in_grid)
    file.write('\n' + finished_summary_strat)
    file.write(table_in_grid_strat)
    file.write(table_in_grid_strat_pcs)
    file.write(f'{dropdown_summary_strat}{finished_faq_summary_strat}\n:::\n')
    file.write('\n' + finished_summary_linear)    
    file.write(f'\n{dropdown_summary_lin}{finished_faq_summary_linear}\n:::\n')
    file.write('\n' + finished_summary_grid)
    file.write(summary_grid_images)
    file.write(f'\n{dropdown_summary_grid}{finished_faq_summary_grid}\n:::\n')
    if aggregates is True:
        for a_table in agg_tables:
            file.write(a_table)        
    file.write('\n## Inventory\n')
    file.write(inventory_tables)