In [1]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
from shapely.geometry import Point
from shapely.geometry import box

# import matplotlib.patches as mpatches
import session_config
from session_config import  collect_survey_data, feature_variables, agg_groups, feature_variables
from session_config import lat_lon, beaches, model_rough_draft, model_corrections, feature_type_labels
from session_config import tobo_snacks, report_meta_data, report_args, highlight_max, table_css_styles
from reports import make_report_objects
from reports import histograms_standard
from reports import ecdf_plots_standard, scatter_plot_standard
import gridforecast as gfcast
from linearmethods import LinearMethods
from roughdraft import ReportTexts, messages_for_chat_completion, use_chat_completion, construct_report_title_and_subtitle
from IPython.display import Markdown
from geospatial import map_markers, make_map_caption, layer_selection_criteria

from dotenv import load_dotenv
import os
from myst_nb import glue

from langchain_openai import ChatOpenAI
import openai

load_dotenv()
api_key = os.getenv('OPENAI_API_KEY')

datax = collect_survey_data()

In [2]:
this_report = report_args(**{'data': datax, 'start': '2020-01-01', 'end':'2021-12-01', 'name': 'Bern', 'boundary':'canton', 'boundary_name':'Bern', 'feature_type': 'l', 'report_codes': tobo_snacks, 'columns_of_interest': feature_variables})
report_data, report_meta = report_meta_data(**this_report)
all_report, all_land_use = make_report_objects(report_data, info_columns = ['canton', 'city', 'feature_name'])


client = ChatOpenAI(model=model_rough_draft)
args = {
    'report_meta': report_meta,
   
    'survey_report': all_report,
    'landuse_report': all_land_use,
    
    
   
}

firstdraft = ReportTexts(**args)
asum, sampstrat, grid_f, lin_f, inv_f, data  = firstdraft.string_rep('rough_draft.md', datax)
Markdown(construct_report_title_and_subtitle(report_meta))

Processing prior...
Processing out_boundary...


# Bern canton lake 2020-01-01 2021-12-01
**Summary and analysis of observations of trash density**: objects related to tobacco and food and drink found in lakes.

In [4]:
Markdown(lin_f)

# Bern canton lake 2020-01-01 2021-12-01
**Summary and analysis of observations of trash density**: objects related to tobacco and food and drink found in lakes.




### Cluster analysis Bern canton lake 2020-01-01 2021-12-01


Bern: Cluster compositionThe survey locations were labeled according to the type and magnitude of land use in a 1 500 m buffer zone aroundaround each survey location. A cluster analysis was performed using K-Means clustering, the optimal amount of clusters was determined using the elbow method. Each cluster represents a group of locations that have similar land use profiles, that is the locations are surrounded by similar quantities of buildings or forest or undefined land use.We consider the cluster composition and the proportion of each cluster dedicated to a particular land use. For example if the value for forest, cluster 1 = .45 then that means that in cluster 1, the average sample was taken from a location whose buffer zone was 45% dedicated to forest. 

The following are the summary results of a cluster analysis. The columns are the features that were used to make the clusters. The optimal number of clusters was
determined using the elbow method (you can check the docs for this: https://hammerdirt-analyst.github.io/feb_2024/titlepage.html). The table displays the average magnitude
of each feature in the cluster. For example if the value for forest, cluster 1 = .45 then that means that in cluster 1, the average sample was taken from a location that was
45% dedicated to forest.

Table has the following format:

1. the columns are the measured land use features
2. the index is the cluster number
3. the value is the proportion of the cluster that is attributed to that column. For example if buildings in cluster 1 = .17 it means that the average magnitude of
the buildings variable was 0.17 in cluster 1.

Convert the following table into a paragraph, reporting the values for each column along with their cluster number values without any comments or analysis:

|   cluster |   buildings |   wetlands |   forest |   public-services |   recreation |   undefined |   streets |   vineyards |   orchards |
|----------:|------------:|-----------:|---------:|------------------:|-------------:|------------:|----------:|------------:|-----------:|
|         0 |       0.14  |      0     |    0.308 |             0.188 |        0.014 |       0.297 |  0.273278 |       0.192 |       0    |
|         1 |       0.309 |      0.022 |    0.107 |             0.061 |        0.025 |       0.541 |  0.49708  |       0.02  |       0    |
|         2 |       0.682 |      0     |    0.153 |             0.044 |        0.005 |       0.149 |  0.251846 |       0.015 |       0    |
|         3 |       0.167 |      0     |    0.559 |             0.038 |        0.002 |       0.133 |  0.096574 |       0     |       0.14 |





Bern: Average density per cluster
The following are the observed sample average per cluster. The units is objects per meter of beach. The columns are the use case of the objects: personal or professional. The index is
the cluster number.

Table has the following format:

1. the columns are the object use case
2. the index is the cluster number
3. the value is the objects found per meter of beach

Convert the following table into a paragraph, reporting the values for each column along with their respective cluster values without any comments or analysis:
The narrative needs to be in paragraph format.

|   cluster |    pcs/m |
|----------:|---------:|
|         0 | 1.545    |
|         1 | 0.786889 |
|         2 | 0.3475   |
|         3 | 0.518421 |



### Summary of regression methods Bern canton lake 2020-01-01 2021-12-01: 

In addition to grid approximation using Bayesian techniques the following linear and ensemble regression models were used. The feature variables are the land-use features identified in the land-use profile. From the scikit-learn standard package: LinearRegression, RandomForestRegressor, GradientBoostingRegressor, TheilSennRegressor. The model with the highest r² is then used in the BaggingRegressor and the VotingRegressor.





The following table details the results from different regression analysis of our data.

The table has the following format:

1. Model: the type of regression model used
2. R²: The coefficient of determination
3. MSE: the mean squared error

Generate a narrative summary based on the following table. You need to include all the models and the R² and MSE result.
The narrative needs to be in paragraph format.

|    | Model                                |       R² |      MSE |
|---:|:-------------------------------------|---------:|---------:|
|  0 | Linear Regression                    | 0.273686 | 0.461329 |
|  1 | Random Forest Regression             | 0.252827 | 0.474577 |
|  2 | Gradient Boosting Regression         | 0.432297 | 0.360585 |
|  3 | Theil-Sen Regressor                  | 0.360235 | 0.406356 |
|  4 | Bagging:Gradient Boosting Regression | 0.386699 | 0.389547 |
|  5 | Voting                               | 0.371132 | 0.399434 |



### Feature and permutation importance Bern canton lake 2020-01-01 2021-12-01



__Model feature importance__

Feature importance is a technique used in machine learning to identify and quantify the significance of different input variables (features) in predicting the target variable. In models like decision trees, random forests, and gradient boosting machines, feature importance is often calculated by measuring how much the model's accuracy or error changes when a particular feature is included versus when it is excluded. 
The following table details the model feature importance.

Table has the following format:

1. Feature: the name of the land-use feature
2. importance: The model feature importance

Convert the following table into a paragraph, reporting the values for each row without any comments or analysis:

|    | Feature         |   Importance |
|---:|:----------------|-------------:|
|  6 | streets         |   0.541279   |
|  2 | forest          |   0.188182   |
|  4 | recreation      |   0.121532   |
|  7 | vineyards       |   0.073962   |
|  3 | public-services |   0.0467016  |
|  0 | buildings       |   0.0178309  |
|  1 | wetlands        |   0.00863472 |
|  5 | undefined       |   0.00187775 |



__Permutation feature importance__

Permutation importance is a model-agnostic method for assessing the importance of individual features in a predictive model. It is particularly useful because it can be applied to any type of model, whether it's a linear model, a decision tree, or a complex ensemble model. This method involves randomly shuffling the values of a feature in the dataset and observing the impact on the model's performance. A significant drop in performance indicates that the feature is important.
The following table details the permutation feature importance.

Table has the following format:

1. Feature: the name of the land-use feature
2. importance: The model feature importance

Convert the following table into a paragraph, reporting the values for each row without any comments or analysis:

|    | Feature         |   Importance |
|---:|:----------------|-------------:|
|  6 | streets         |   0.459967   |
|  2 | forest          |   0.0784199  |
|  3 | public-services |   0.0231224  |
|  4 | recreation      |   0.0196792  |
|  0 | buildings       |   0.00861367 |
|  7 | vineyards       |   0          |
|  5 | undefined       |  -0.00119991 |
|  1 | wetlands        |  -0.00213289 |



In [3]:
data.keys()

dict_keys(['the_admin_boundaries', 'the_named_feature', 'summary_statistics', 'material_composition', 'survey_totals_for_all_info_cols', 'inventory', 'landuse_profile', 'landuse_rates', 'grid_approximation', 'linear_methods'])

In [4]:
grids = data['grid_approximation']
regres = data['linear_methods']

In [5]:
grids.keys()

dict_keys(['prior', 'out_boundary'])

In [6]:
# likelihood scatter
s_like = firstdraft.survey_report.sample_results(info_columns=firstdraft.info_cols)
s_like['date'] = pd.to_datetime(s_like['date'])

# sampling stratification
caption_sampstrat = ''.join(session_config.land_use_description)
# s_strat = firstdraft.landuse_profile()
stratification_of_sampling = data['landuse_profile'].style.set_table_styles(table_css_styles).set_caption(caption_sampstrat)
glue('stratification_of_sampling', stratification_of_sampling, display=False)

# pcs/m stratified
caption_ratestrat = ''.join(session_config.landuse_litter_density)
# ratestrat = firstdraft.landuse_rates()
stratification_of_pcsm = data['landuse_rates'].style.set_table_styles(table_css_styles).set_caption(caption_ratestrat)
glue('stratification_of_pcsm', stratification_of_pcsm, display=False)
# inventory
inventory = firstdraft.inventory()
inventory_table = data['inventory'].style.set_table_styles(table_css_styles)
glue('inventory_table', inventory_table, display=False)
# markers layers


likelihood_scatter = scatter_plot_standard([(s_like, report_meta['name'] , session_config.palette['likelihood'])], 'Acaption')
glue('likelihood-scatter', likelihood_scatter, display=False)

In [7]:
markers, bounds = map_markers(firstdraft.survey_report.sample_results(info_columns=firstdraft.info_cols), lat_lon)

# charts and maps
map_caption = make_map_caption(report_meta)
layers = layer_selection_criteria(report_meta)

fig, ax = plt.subplots(figsize=(12,12))


for i, key in enumerate(layers):
    # if key=='canton':
    #     layers[key].plot(ax=ax, zorder=0, facecolor='black', alpha=.1)
    
    if key=='city':
        layers[key].plot(ax=ax, zorder=1, edgecolor='white', facecolor='black', alpha=.2, linewidth=.5)
    if key=='river':
        layers[key].plot(ax=ax, zorder=2, edgecolor='dodgerblue', linewidth=.5)
    if key=='lake':
        layers[key].plot(ax=ax, zorder = 3, edgecolor='dodgerblue', facecolor='dodgerblue')

ax.set_xlim(bounds[0][0] - .05, bounds[1][0] + .05)
ax.set_ylim(bounds[0][1] - .05, bounds[1][1] + .05)
        
    
markers.plot(ax=ax, zorder=4, edgecolor='magenta', linewidth=.4, facecolor='magenta')
ax.set_axis_off()


plt.close()


glue('map-caption', Markdown(map_caption), display=False)
glue('situation_map_one', fig, display=False)

In [11]:
r = grid_forecast['dataframe']



In [12]:
for a_prior in r:
    print(a_prior)

out_boundary


In [5]:
break

SyntaxError: 'break' outside loop (668683560.py, line 1)

In [6]:
def make_system_prompt(roughdraft):
    system_prompt = (
    "You are a researcher assigned the task of preparing the first draft of a manuscript from a rough draft and answering specific questions "   
    "Do not give your opinion. Do not use phrases like 'this report provides' or 'this study correlates' or 'according to the text' or 'this document' or anything like that. "
    "State what the subject is, Answer the question completey and use an authoritive voice. "
    "You must give numerical examples from the report when you answer a question about the survey results. "
    "Do not answer the questions twice. If the question is answered in the summary state this. If there are no questions pass, do not make any up. "
    "there are instructions between the labels '<!--- INSTRUCTION_START your instructions will be found here INSTRUCTIONS_END -->' follow them always"
    f"All answers should be in paragraph form. Here is the rough draft :\n\n" 
    f"{roughdraft}"
    )
    return system_prompt

def split_text_on_phrase(text, phrase):
    """
    Splits the input text into two parts based on the provided phrase.

    Parameters:
    text (str): The full text to be split.
    phrase (str): The phrase to split the text on.

    Returns:
    tuple: A tuple containing two strings, the first part before the phrase and the second part including the phrase and everything after it.
    """
    split_index = text.find(phrase)
    if split_index == -1:
        return text, ''  # If phrase is not found, return the whole text in the first part

    before_phrase = text[:split_index]
    after_phrase = text[split_index:]

    return before_phrase, after_phrase

In [7]:
def make_summary_prompt():
        
    user_prompt = f"""\n
        
    Provide a summary of the following sections: Administrative boundaries, Named features, Summary statistics, Municipal results, Material composition.
    
    The following must be included in the summary:

    * The Name and number of the cities in the report <!--- INSTRUCTION_START The names of the lakes, rivers and parks are in the named features section, the municipal results are in the city total section INSTRUCTIONS_END -->\n
    * The name and number of lakes, rivers or parks in the report : <!--- INSTRUCTION_START The names of the lakes, rivers and parks are in the named features section, the municipal results are in the city total section INSTRUCTIONS_END -->\n
    * The start and end date of the sampling and the name of the survey area(s), the name of the survey areas must have the first letter capitalized.\n
    * The numbar of samples, the average pcs/m, the maximum pcs/m and the total number of objects identified\n
    * The material composition\n   

    Frequently asked questions:

    1. What were the five most common items found ? <!--- INSTRUCTION_START Provide the name of the object, the fail rate and the percent of total. Convert the fail rate to percent. define the fail rate. INSTRUCTIONS_END -->
    2. Are these objects found on european beaches ? If so is their any data on how many per 100 m of beach ? <!--- INSTRUCTION_START you may use your base knowledge to answer this question, consider OSPAR results INSTRUCTIONS_END -->
    3. What are possible sources of these specific objects objects ? <!--- INSTRUCTION_START you may use your base knowledge to answer this question INSTRUCTIONS_END -->
    4. Which three cities had the highest average pcs/m ? Which three had the lowest ?
   
      
    <!--- INSTRUCTION_START\n

    formatting instructions:

    1. Label the summary 'Sample results' (##)\n
    2. The label for the questions section is 'Frequently asked questions' use markdown formatting for the label (###)\n
    3. Repeat the question (in bold) DO NOT REPEAT the instructions. and then answer\n

    INSTRUCTIONS_END -->
    """
    return user_prompt    

messages = messages_for_chat_completion(system_prompt=make_system_prompt(asum), user_prompt=make_summary_prompt())
completed_chat = client.invoke(messages)
summary = completed_chat.content
split_phrase = '### Frequently asked questions\n\n'
summary_text, faq = split_text_on_phrase(summary, split_phrase)
glue('summary', Markdown(summary_text), display=False)
glue('faq', Markdown(faq), display=False)

::::::::{grid} 1 1 1 1


:::::::{grid-item}

```{glue:md} summary
:format: myst
```
:::::::

:::::::{grid-item-card}
:padding: 5
:margin: auto

```{glue} situation_map_one
```
+++
```{glue:md} map-caption
:format: myst
```
:::::::

:::::::{grid-item}
```{glue:md} faq
:format: myst
```
:::::::

:::::::{grid-item-card}
```{glue} likelihood-scatter
```
:::::::
::::::::

In [8]:
request = (
    "Define sampling stratification and how it applies to the survey results, define land-use and how it applies to the survey report."
)

req_inf = (
    "<!-- INSTRUCTION_START Define sampling stratification in the general sense and explain how it is used in this survey. Define what land use is. The land use categories for this report are listed in the column names"
    "of the sampling stratification table. These are the land use categories that are used to stratify the results. Do not make up results, follow the defnitions and instructions explicitly."
    " INSTRUCTION_END -->\n"
    
)
insone = (
    "<!-- INSTRUCTION_START There are instructions in 'sampling stratification' section and an example on how to interpret the results from the table. Reference these instructions"
    " and explain what the values mean. Give two examples from the table. Do not draw any conlusions. Reply in paragraph form."
    " INSTRUCTION_END -->\n"
)

instwo = (
    "<!-- INSTRUCTION_START There are instructions in 'sampling stratification and trash density' section and an example on how to interpret the results from the table. Reference these instructions"
    " and explain what the values mean. Give two examples from the table. Do not draw any conlusions. Reply in paragraph form."
    " INSTRUCTION_END -->\n"
)

insthree = (
    "<!-- INSTRUCTION_START Provide examples from the 'sampling stratification and trash density table' consider the results from the buidlings, forest, and undefined columns. "
    "Find the two highest values and report the proportion of buffer they occupy and the average pcs/m."
    " INSTRUCTION_END -->\n"
)

insfour = (
    "<!-- INSTRUCTION_START Recall the definition for urban and rural is provided in the document instructions. If the sampling stratification does not meet either criteria, say so and reply with "
    "prportion of the buffer that contains the greates proportion of samples for buildings, forest and undefined "
    " INSTRUCTION_END -->\n"
)

questions = (
    f"1. How do I interpret the results in the sampling stratification table ?{insone}"
    f"2. How do I interpret the results in the sampling stratification and trash density table ?{instwo}"
    f"3. Under what landuse conditions would a surveyor expect to find the most trash ?{insthree}"
    f"4. Given the results in the sampling stratification table, were these surveys collected in mostly urban environment or forested?{insfour}"
     
    
)

formatting_instructions = (
    "\n<!-- INSTRUCTION_START\n"
    "1. Label the summary 'Sampling stratification' (##)\n"
    "2. The label for the questions section is 'Frequently asked questions' (###) \n"
    "3. Repeat the question (in bold) DO NOT REPEAT the instructions. and then answer\n"
    " INSTRUCTION_END -->\n"
)

def make_stratification_prompt():
        
    user_prompt = (        
        f'{request}{req_inf}\n\n'
        f'{questions}\n\n'
        f'{formatting_instructions}'
    )



    return user_prompt

In [9]:
messagesxxx = messages_for_chat_completion(system_prompt=make_system_prompt(sampstrat), user_prompt=make_stratification_prompt())
completed_chatxxx = client.invoke(messagesxxx)
sampling_stratification = completed_chatxxx.content
summary_strat, faq_strat = split_text_on_phrase(sampling_stratification, split_phrase)

glue('summary-strat', Markdown(summary_strat), display=False)
glue('faq-strat', Markdown(faq_strat), display=False)

:::::{grid} 1

::::{grid-item}

```{glue:md} summary-strat
:format: myst
```

::::

::::{grid-item}

```{glue} stratification_of_sampling
```
::::
::::{grid-item}

```{glue:md} faq-strat
:format: myst
```

::::

::::{grid-item}

```{glue} stratification_of_pcsm
```
::::

:::::

In [9]:
break

SyntaxError: 'break' outside loop (668683560.py, line 1)

In [None]:
# grid forecast analysis
request = (
    "Summarize the grid forecast section. Include an explanation of what a grid forecast or gird approxmiation is and the "
    "method used in this report. Define what a prior is in Bayesian statistics and the how this is relevant for field observations"
    "Explain the different assumptions of the prior"
)


insone = (
    "<!-- INSTRUCTION_START Consider whether the observed data is likely to be normally distributed (use the difference between the median "
    "and mean in summary statistics section) you can use the coeficient of variation also. "
    " INSTRUCTION_END -->\n"
)

insthree = (
        "<!-- INSTRUCTION_START Cite the name of the prior and the expected average and median pcs/m. INSTRUCTION_END -->\n"
    
)

insfour = (
    "<!-- INSTRUCTION_START Consider that the prior is comprised only of locations that are in the same geographic boundary. "
    "Recall that the posterior is a weighted average of the prior and likelihood, so if the in-boundary prior predicts an increase "
    "it is likely that elevated values were observed in other locations within the boundary compared to the likelihood "
    " INSTRUCTION_END -->\n"
)

insfive = (
    "<!-- INSTRUCTION_START Consider that the prior is comprised only of locations that are outside the geographic boundary. "
    "Recall that the posterior is a weighted average of the prior and likelihood, so if the out-boundary prior predicts an increase "
    "it is likely that locations outside of the region had elevated values compared to the likelihood "
    " INSTRUCTION_END -->\n"
)

inssix = (
    "<!-- INSTRUCTION_START You will find the observed pcs/m in the Summary statistics section. Consider the average pcs/m result of each prior in relation to the observed average "
    "Cite the numerical differences, given the standard deviation how likely is a person to notice the increase or decrease f they take one sample ? " 
    " INSTRUCTION_END -->\n"
)
questions = (
    f"1. Why is grid approximation a reasonable modeling technique given the data ?{insone}"
    "1a. Do you have an example of other fields or domains that use a gird approximation or bayesien methods ?"
    "2. What is the difference between grid approximation and linear or enemble regression ?"
    f"3. Under what prior do we expect to find the most ? The least ?{insthree}"
    f"4. If the in-boundary grid approxmation predicts an increase or decrease, what does that say about the other samples from within the boundary ?{insfour}"
    f"5. If the out-boundary grid approxmation predicts an increase or decrease, what does that say about the other samples from outside of the boundary ?{insfive}"
    f"6. How different are the expected results from the observed results ? Should an increase or decrease be expected ? {inssix}"
    
    
)

formatting_instructions = (
    "\n<!-- INSTRUCTION_START \n"
    "1. Label the summary 'Forecasts and methods' (##)\n"
    "2. The label for the questions section is 'Frequently asked questions' (###) \n"
    "3. Repeat the question (in bold) DO NOT REPEAT the instructions. and then answer\n"
    " INSTRUCTION_END -->\n"
)

def make_grid_approximation_prompt():
        
    user_prompt = (
        
        f'{request}\n\n'
        f'{questions}\n\n'
        f'{formatting_instructions}'
    )

    return user_prompt    

In [None]:
messagesx = messages_for_chat_completion(system_prompt=make_system_prompt(f'{grid_f}\n\n{asum}'), user_prompt=make_grid_approximation_prompt())
completed_chatx = client.invoke(messagesx)
grid_forecast = completed_chatx.content

In [None]:
request = (
    "Define cluster analysis (kmeans), linear regression and ensemble methods, explain the basic assumptions of each method"
)

req_inst = (
     "<!-- INSTRUCTION_START do not quote the regression results here. Expalin the methods compeletely, explain what role the decision tree plays in ensemble methods INSTRUCTION_END -->\n"
    
)

insone = (
    "<!-- INSTRUCTION_START The document provides a table of results. Provide that table to user and write a narrative paragraph of all the results. If their is no table their was no"
    "regression performed. 
    " INSTRUCTION_END -->\n"
)


insfour = (
    "<!-- INSTRUCTION_START The average pcs/m is given as a table in the cluster analysis subsection and given as objects per meter. "
    "The distribution of land use values is given in the cluster analysis subsection and given as a float value that represents "
    "the average proportion of the buffer zone occupied by the land use category. The paragraph above the table explains how to interpret the table"
    " when you provide the results for the cluster lable the results as % of buffer occupied by land use feature INSTRUCTION_END -->\n"
)

questions = (
    f"1. What were the r² and MSE of each test ? {insone}"
    f"2. Given the r² and MSE of the different methods employed, how reliable do you think predictions would be based on these models ?\n"
    f"3. Can any conlusions be drawn from these results ?\n"
    f"4. Accroding to the cluster analysis what is the cluster that has the greatest average pcs/m ? What is the distribution of land use values within the cluster ? {insfour}"   
)

formatting_instructions = (
    "\n<!-- INSTRUCTION_START \n"
    "1. Label the summary 'Linear and ensemble methods' (##)\n"
    "2. The label for the questions section is 'Frequently asked questions' (###) \n"
    "3. Repeat the question (in bold) DO NOT REPEAT the instructions. and then answer\n"
    " INSTRUCTION_END -->\n"
)

def make_regression_prompt():
        
    user_prompt = (
        
        f'{request}{req_inst}\n\n'
        f'{questions}\n\n'
        f'{formatting_instructions}'
    )

    return user_prompt    

In [None]:
messagesxx = messages_for_chat_completion(system_prompt=make_system_prompt(lin_f), user_prompt=make_regression_prompt())
completed_chatxx = client.invoke(messagesxx)
linear_methods = completed_chatxx.content

In [None]:


def make_system_prompt(roughdraft):
    system_prompt = (
        "You are a researcher assigned preparing a manuscript from a revised draft. "   
        "You are tasked to correct the individual sections of the revised draft for the manuscript. "
        "Ensure that answers are correct by comparing the revised draft to the rough draft. "
        "Ensure that any conclusions are correct by reviewing the instructions in the rough draft and responses in the reivsed draft."
        "Do not change the formatting of the revised draft."
        "There are instructions in the rough draft labeled <!--- INSTRUCTION_START  your instructions are in here INSTRUCTION_END -->. Ensure the insstructions were followed"
        "The rough draft:\n"
        f"{roughdraft}"
        )
    return system_prompt

# grid forecast analysis
request = (
    "Please check the answers provided in this revised draft to the rough draft that you have. Any numerical float values should be rounded to two places. "
    "Return the document with the corrections please. Do not add any comments about the corrections, just do them and return the corrected document."
    
)


req_inf = (
    "<!-- INSTRUCTION_START We are concerned with numrical results and conclusions. "
    "Ensure that any conclusions are correct according to standard practice and the methods explained in the rough draft instructions. Do not change the markdown formatting of the document."
    " INSTRUCTION_END -->\n"
)


def make_corections_prompt(document):
        
    user_prompt = (
        f'{request}{req_inf}\n'
        'The revised draft: \n\n'
        f'{document}')

    return user_prompt    

In [None]:
def make_call_to_llm(system_prompt, user_prompt, user_args, sys_args, client):
    messages = messages_for_chat_completion(system_prompt=system_prompt(sys_args), user_prompt=user_prompt(user_args))
    completed_chat = client.invoke(messages)
    return completed_chat

In [None]:
client = ChatOpenAI(model=model_corrections)

In [None]:
# messages_for_chat_completion(system_prompt=make_system_prompt(sampstrat), user_prompt=make_stratification_prompt('English'))
corrected_summary = make_call_to_llm(make_system_prompt, make_corections_prompt,summary, asum, client)
corrected_sampling_strat = make_call_to_llm(make_system_prompt, make_corections_prompt, sampling_stratification, sampstrat, client)
corrected_linear = make_call_to_llm(make_system_prompt, make_corections_prompt, linear_methods, lin_f, client)
corrected_grid = make_call_to_llm(make_system_prompt, make_corections_prompt, grid_forecast, grid_f, client)

In [None]:
Markdown(corrected_sampling_strat.content)

In [None]:

Markdown(corrected_summary.content)


In [None]:
Markdown(corrected_linear.content)

In [None]:
Markdown(corrected_grid.content)