## Notebook to prototype response generation from models

Sections:
1 - Run inference against locally hosted refact

In [None]:
# Install human_eval data
!pip install human_eval

## measure_humaneval_continue.py refactored to remove MPI
Demonstration of accessing model endpoint via refact endpoint at http://127.0.0.1:8008/v1/completions

In [2]:
# Imports
import sys
import termcolor
import subprocess
import json
import time
import random
from copy import deepcopy
from human_eval.data import write_jsonl, read_problems
import requests
import pandas as pd

In [3]:
# Global constants
#MODEL = "smallcloudai/Refact-1_6B-fim"
# MODEL = "Refact/1.6B"
# MODEL = "codellama/7b/lora-20231026-161421"
MODEL = "codellama/7b/lora-20231107-201630"
#MODEL = "iter0070-testloss0.833"


TEMPERATURE = 0.2
TOP_P = 0.95
TIMES = 1
MAX_TOKENS = 256
INFERENCE_ENDPOINT = 'http://52.36.165.232:8008'

metadata = {
    'MODEL': MODEL,
    'TEMPERATURE': TEMPERATURE,
    'TOP_P': TOP_P,
    'TIMES': TIMES,
    'MAX_TOKENS': MAX_TOKENS
}



In [4]:
# Functions
def run_completion_call(src_txt):
    res = requests.post(f"{INFERENCE_ENDPOINT}/v1/completions", json={
        "model": MODEL,
        "max_tokens": MAX_TOKENS,
        "stream": False,
        "echo": True,
        "top_p": TOP_P,
        "temperature": TEMPERATURE,
        "prompt": src_txt,
        "stop": ["\n\n\n"],
    })
    res.raise_for_status()
    j = res.json()
    print(j)
    return j["choices"][0]["text"]


def test_by_continuing(case):
    orig = case["prompt"].rstrip()
    print_me = termcolor.colored(orig[:-1], "yellow")
    print(print_me)
    t = run_completion_call(orig)
    uncut = t
    lines = t.split("\n")
    filtered = [x for x in lines if x.startswith(" ") or x.strip() == ""]
    t = "\n".join(filtered)
    #assert uncut.startswith(t)
    print_response = termcolor.colored(t, "green") + " " + termcolor.colored(uncut[len(t):], attrs=["dark"])
    print(print_response)
    case["completion"] = t



In [5]:
postfix = ""

t0 = time.time()
problems = list(read_problems().values()) * TIMES
output = []


for i, case_ in enumerate(problems):
    case = deepcopy(case_)
    print("-" * 40, f" case={i}", "-" * 40)
    test_by_continuing(case)
    output.append(case)



t1 = time.time()

output_name = f"human-continue{postfix}.jsonl"
write_jsonl(output_name, output)

res = subprocess.check_output(f"evaluate_functional_correctness {output_name}", shell=True)
metrics = json.loads(res.decode('utf-8').strip().split('\n')[-1].replace("'", '"'))
print(termcolor.colored(metrics, "magenta"))

tmp = f"method=continue temperature={TEMPERATURE} top_p={TOP_P} postfix='{postfix}' times={TIMES}  {metrics} {(t1 - t0):.2f} {MODEL}\n"

with open("human-eval-all-results.txt", "a") as f:
    f.write(tmp)

print(tmp)

----------------------------------------  case=0 ----------------------------------------
[33mfrom typing import List


def has_close_elements(numbers: List[float], threshold: float) -> bool:
    """ Check if in given list of numbers, are any two numbers closer to each other than
    given threshold.
    >>> has_close_elements([1.0, 2.0, 3.0], 0.5)
    False
    >>> has_close_elements([1.0, 2.8, 3.0, 4.0, 5.0, 2.0], 0.3)
    True
    ""[0m
{'id': 'comp-x51b78K9ef4Z', 'object': 'text_completion', 'status': 'completed', 'created': 1701775295.8195434, 'uploaded': 1701775340.0031796, 'generated_tokens_n': 255, 'model': 'codellama/7b', 'choices': [{'index': 0, 'logprobs': None, 'finish_reason': 'length', 'text': '\n    for i in range(len(numbers) - 1):\n        for j in range(i + 1, len(numbers)):\n            if abs(numbers[i] - numbers[j]) < threshold:\n                return True\n    return False\n\n\ndef has_close_elements_2(numbers: List[float], threshold: float) -> bool:\n    """ C

## MPI variant of continue_human_eval

In [None]:
# Imports
import sys, termcolor, subprocess, json, time, random
from copy import deepcopy
from mpi4py import MPI
from human_eval.data import write_jsonl, read_problems
from human_eval.data import read_problems
import requests
import pandas as pd

sys.path.append("~/workspace/capstone-code-generation/evaluation_framework")
from config import config_dict, logger

In [None]:
config_dict

In [None]:
# Global constants
#MODEL = "smallcloudai/Refact-1_6B-fim"
# MODEL = "Refact/1.6B"
#MODEL = "codellama/7b/lora-20231026-161421"
#MODEL = "codellama/7b"
MODEL = "codellama/7b/lora-20231107-201630"



TEMPERATURE = 0.2
TOP_P = 0.95
TIMES = 1
MAX_TOKENS = 256
INFERENCE_ENDPOINT = 'http://127.0.0.1:8008'

metadata = {
    'MODEL': MODEL,
    'TEMPERATURE': TEMPERATURE,
    'TOP_P': TOP_P,
    'TIMES': TIMES,
    'MAX_TOKENS': MAX_TOKENS
}

In [None]:
# Functions
def run_completion_call(src_txt):
    res = requests.post(f"{INFERENCE_ENDPOINT}/v1/completions", json={
        "model": MODEL,
        "max_tokens": MAX_TOKENS,
        "stream": False,
        "echo": True,
        "top_p": TOP_P,
        "temperature": TEMPERATURE,
        "prompt": src_txt,
        "stop": ["\n\n\n"],
    })
    res.raise_for_status()
    j = res.json()
    # print(j)
    return j["choices"][0]["text"]


def test_by_continuing(comm, case):
    orig = case["prompt"].rstrip()
    print_me = termcolor.colored(orig[:-1], "yellow")
    if comm.size == 1:
        print(print_me)
    t = run_completion_call(orig)
    uncut = t
    lines = t.split("\n")
    filtered = []
    for x in lines:
        if x.startswith(" ") or x.strip() == "":
            filtered.append(x)
        elif not x.startswith(" "):
            break
    t = "\n".join(filtered)
    assert uncut.startswith(t)
    print_response = termcolor.colored(t, "green") + " " + termcolor.colored(uncut[len(t):], attrs=["dark"])
    if comm.size == 1:
        print(print_response)
    else:
        print(print_me + "\n" + print_response)
    case["completion"] = t


In [None]:
MODEL

In [None]:
# main body of measure_humaneval_continue.py
t0 = time.time()
from human_eval.data import write_jsonl, read_problems
from human_eval.data import read_problems
problems = list(read_problems().values()) * TIMES
comm = MPI.COMM_WORLD
my_problems = problems[comm.rank::comm.size]
output = []
for i, case_ in enumerate(my_problems):
    case = deepcopy(case_)
    print("-" * 40, " rank=%i case=%i" % (comm.rank, i), "-" * 40)
    test_by_continuing(comm, case)
    output.append(case)
comm.barrier()
t1 = time.time()
tmp = comm.gather(output, root=0)
if comm.rank == 0:
    all_output = [x for y in tmp for x in y]
    output_name = "human-%s%s.jsonl" % ("continue", postfix)
    write_jsonl(output_name, all_output)
    res = subprocess.check_output(f"evaluate_functional_correctness {output_name}", shell=True)
    metrics = json.loads(res.decode('utf-8').strip().split('\n')[-1].replace("'", '"'))
    print(termcolor.colored(metrics, "magenta"))
    tmp = "method=%s temperature=%0.2f top_p=%0.2f postfix='%s' world=%i times=%i  %s %0.2fs %s\n" % (
        "continue", TEMPERATURE, TOP_P, postfix, comm.size, TIMES, metrics, (t1 - t0), MODEL)
    with open("human-eval-all-results.txt", "a") as f:
        f.write(tmp)
    print(tmp)

## Non-MPI Variant: Run /completions endpoint on prompts, test code for generate_responses.py

In [1]:
# Imports
# Imports
import sys, termcolor, subprocess, json, time, random
from copy import deepcopy
from mpi4py import MPI
import requests
import pandas as pd
import os

# Shared config from across project
from config import config_dict, logger


In [2]:
config_dict

{'pipeline_steps_to_run': ['split_train_test',
  'make_prompts',
  'generate_responses',
  'evaluate_responses'],
 'code_base_directory': '../snap_v4_clone/',
 'train_ratio': 0.8,
 'output_directory': 'data_produced_by_eval_fram/',
 'seed': 0,
 'split_strategy': 'random',
 'num_label_tokens': 2,
 'tokens_prompts_count': 1000,
 'num_label_lines': 2,
 'lines_prompts_count': 1000,
 'methods_prompts_count': 100,
 'infill': True,
 'max_lines_above': 50,
 'max_lines_below': 50,
 'prompt_directory': 'data/prompts/',
 'token_masking_strategies': ['token_infill', 'line_infill', 'method_infill'],
 'models': ['code_llama_7b_fine_tuned'],
 'generated_responses_directory': 'data/generated_responses/',
 'inference_model': 'codellama/7b/lora-20231107-201630',
 'inference_temperature': 0.2,
 'inference_top_p': 0.95,
 'inference_times': 1,
 'inference_max_tokens': 256,
 'evaluation_metrics': ['pass@k'],
 'evaluation_directory': 'data/evaluate_responses/'}

In [3]:
# Global constants
#MODEL = "smallcloudai/Refact-1_6B-fim"
#MODEL = "Refact/1.6B"
#MODEL = "codellama/7b"
#MODEL = "codellama/7b/lora-20231107-201630"
#PROMPT_FILE = "methods_df_10_30.csv"

# PROMPT_FILE = config_dict['prompt_file'] ToDo - capture prompt file
# for logging

# Global config, can be overridden via function arguments to 
# evaluation_framework function.
PROMPTS_PATH = config_dict['prompt_directory']
GENERATED_RESPONSES_DIRECTORY = config_dict['generated_responses_directory']
MODEL = config_dict['inference_model']
TEMPERATURE = config_dict['inference_temperature']
TOP_P = config_dict['inference_top_p']
TIMES = config_dict['inference_times']
MAX_TOKENS = config_dict['inference_max_tokens']

metadata = {
    'MODEL': MODEL,
    'TEMPERATURE': TEMPERATURE,
    'TOP_P': TOP_P,
    'TIMES': TIMES,
    'MAX_TOKENS': MAX_TOKENS
}

In [4]:
metadata

{'MODEL': 'codellama/7b/lora-20231107-201630',
 'TEMPERATURE': 0.2,
 'TOP_P': 0.95,
 'TIMES': 1,
 'MAX_TOKENS': 256}

In [77]:
# Function definitions
def run_completion_call(src_txt,
                        model,
                        max_tokens,
                        top_p,
                        temperature):
    """ Execute a single call to refact /v1/completions API

    Keyword arguments:
    src_txt -- prompt to be fed to refact hosted model. The Completion
    API assumes that prompt is a completion-oriented prompt, typically
    of the form <PRE> ... text ... <SUF> .. text .. <MID>. It is the 
    model's task to "fill in the missing text between <PRE> and <SUF>.
    """
    res = requests.post(f"{INFERENCE_ENDPOINT}/v1/completions", json={
        "model": model,
        "max_tokens": max_tokens,
        "stream": False,
        "echo": True,
        "top_p": top_p,
        "temperature": temperature,
        "prompt": src_txt,
        "use_fast_kernels": True,
        "stop": ["\n\n\n"],
    })
    res.raise_for_status()
    j = res.json()
    print(j)
    return j["choices"][0]["text"]


def generate_responses(output_directory = GENERATED_RESPONSES_DIRECTORY,
                       prompts_path = PROMPTS_PATH,
                       model = MODEL,
                       temperature = TEMPERATURE,
                       top_p = TOP_P,
                       times = TIMES,
                       logprobs = False,
                       max_tokens = MAX_TOKENS):
    """ Given an output directory, generate responses via inference
    and writes responses to the output directory.

    Keyword arguments:
    output_directory -- directory to write responses, default specified
                        in config.py
    prompts_path -- directory to find prompt files. default specified
                    in config.py.
    prompt_file -- alternative, specify a specific prompt file. If
                   prompt file is specified, the batch directory
                   approach is overriden. No default.
    model -- specify model to be run format examples: 'codellama' or
             'codellama/7b/lora-20231107-201630' in the case of a fine
             tuned model. Default specified in config.py.
    temperature -- temperature for chosen model. Default specified in 
                   config.py
    top_p -- Top P cutoff for token selection. Default specified in 
             config.py
    times -- Number of inference calls for the prompt, supporting
             sampling of outputs. Default specified in config.py
    max_tokens - Maximum number of tokens to be generated. Default
                 specified in config.py.
    """
    # ToDo: Add in ability to specify a specific prompts file and output directory for testing
    logger.info("Generate responses")

    # Process all prompt files in prompts/ directory
    for file in os.listdir(prompts_path):
        if file != 'skipped_files.csv':
            file_path = os.path.join(prompts_path, file)
            # Prompt dataframe. Columns are expected to be labeled
            df = pd.read_csv(file_path)

            # Loop over prompts dataframe, run inference to complete infilling
            # After each file is complete, write new .csv with inference output
            # as new column. Additionally write corresponding metadata file.
            completion_output = []
            for i in range(0,len(df)):
             
                # prompt from input dataframe
                prompt = df.iloc[i]["prompt"]
                print_prompt = termcolor.colored(prompt, "yellow") 

                print("\n", "Prompt:","\n",print_prompt)

                # run inference with prompt
                t = run_completion_call(prompt,
                                        model,
                                        max_tokens,
                                        top_p,
                                        temperature)

                # add response to list
                completion_output.append(t)

                # Print response together with prompt and label
            
                print_response = termcolor.colored(t, "green")

                print("\n", "Inference output:", "\n", print_response)
                print("\n", "Label:","\n", df.iloc[i]["label"])

            # Add generated output back to original DataFrame
            df['Completion'] = completion_output

            # # Write inference output to .csv
            # timestamp = time.strftime("%Y%m%d-%H%M%S")
            # inference_output_filename = f"{output_directory}{timestamp}_{model.replace('/', '-')}_output.csv"
            # df.to_csv(inference_output_filename, index=False)

            # # Write metadata to a JSON file
            # metadata_output_filename = f"{output_directory}{timestamp}_{model.replace('/', '-')}_metadata.json"
            # with open(metadata_output_filename, 'w') as meta_file:
            #     json.dump(metadata, meta_file, indent=4)

            try:
                # Write inference output to .csv
                timestamp = time.strftime("%Y%m%d-%H%M%S")
                model_filename = model.replace('/', '-')  # Replace forward slashes in model name
                inference_output_filename = os.path.join(output_directory, f"{timestamp}_{model_filename}_output.csv")
            
                # Check if the directory exists, create if not
                if not os.path.exists(output_directory):
                    os.makedirs(output_directory)
            
                df.to_csv(inference_output_filename, index=False)
            
            except Exception as e:
                print(f"An error occurred while writing the CSV file: {e}")
            
            try:
                # Write metadata to a JSON file
                metadata_output_filename = os.path.join(output_directory, f"{timestamp}_{model_filename}_metadata.json")
                
                with open(metadata_output_filename, 'w') as meta_file:
                    json.dump(metadata, meta_file, indent=4)
            
            except Exception as e:
                print(f"An error occurred while writing the JSON file: {e}")


In [78]:
# OSError: Cannot save file into a non-existent directory: 'example_prompts20231127-012314codellama/7b'
#!ls ./example_prompts
timestamp = time.strftime("%Y%m%d-%H%M%S")
inference_output_filename = f"{GENERATED_RESPONSES_DIRECTORY}{timestamp}_{MODEL.replace('/', '-')}_output.csv"

print(inference_output_filename)

data/generated_responses/20231127-044918_codellama-7b-lora-20231107-201630_output.csv


In [79]:
config_dict

{'pipeline_steps_to_run': ['split_train_test',
  'make_prompts',
  'generate_responses',
  'evaluate_responses'],
 'code_base_directory': '../snap_v4_clone/',
 'train_ratio': 0.8,
 'output_directory': 'data_produced_by_eval_fram/',
 'seed': 0,
 'split_strategy': 'random',
 'num_label_tokens': 2,
 'tokens_prompts_count': 1000,
 'num_label_lines': 2,
 'lines_prompts_count': 1000,
 'methods_prompts_count': 100,
 'infill': True,
 'max_lines_above': 50,
 'max_lines_below': 50,
 'prompt_directory': 'data/prompts/',
 'token_masking_strategies': ['token_infill', 'line_infill', 'method_infill'],
 'models': ['code_llama_7b_fine_tuned'],
 'generated_responses_directory': 'data/generated_responses/',
 'inference_model': 'codellama/7b/lora-20231107-201630',
 'inference_temperature': 0.2,
 'inference_top_p': 0.95,
 'inference_times': 1,
 'inference_max_tokens': 256,
 'evaluation_metrics': ['pass@k'],
 'evaluation_directory': 'data/evaluate_responses/'}

In [81]:
# generate_responses(output_directory = "./example_prompts/",
#                        prompts_path = "./example_prompts/",
#                        model = MODEL,
#                        temperature = TEMPERATURE,
#                        top_p = TOP_P,
#                        times = TIMES,
#                        max_tokens = MAX_TOKENS)

generate_responses(output_directory = "./example_prompts/",
                       prompts_path = "./example_prompts/",
                       model = MODEL,
                       temperature = TEMPERATURE,
                       top_p = .6,
                       times = 3,
                       logprobs = True,
                       max_tokens = MAX_TOKENS)

2023-11-27 04:55:26,242 - INFO - Generate responses
2023-11-27 04:55:26,245 - DEBUG - Starting new HTTP connection (1): 127.0.0.1:8008
2023-11-27 04:55:26,248 - DEBUG - http://127.0.0.1:8008 "POST /v1/completions HTTP/1.1" 200 None



 Prompt: 
 [33m<PRE>     }

    // TODO: Please note that this function need to be removed once TableOptions Feature
    // is implemented for the SCD2 and MERGE INTO snaps.
    protected void createHashDistributionColumn(PropertyBuilder builder) {
        builder.describe(HASH_DISTRIBUTION_COLUMN_PROP,
                HASH_DISTRIBUTION_COLUMN_LABEL,
                HASH_DISTRIBUTION_COLUMN_DESC)
                .expression()
                .add();
    }

    private void createInputSourceAliasProperty(final PropertyBuilder builder) {
        builder.describe(SCD2_SOURCE_TABLE_ALIAS_PROP,
                SCD2_SOURCE_TABLE_ALIAS_LABEL,
                SCD2_SOURCE_TABLE_ALIAS_DESC)
                .type(SnapType.STRING)
                .expression()
                .add();
    }

    private void createSCD2TableActionProperty(final PropertyBuilder builder) {
        builder.describe(SCD_TABLE_ACTION_PROP, SCD_TABLE_ACTION_LABEL, SCD_TABLE_ACTION_DESC)
                .withAllowedValue

2023-11-27 04:55:38,343 - DEBUG - Starting new HTTP connection (1): 127.0.0.1:8008
2023-11-27 04:55:38,346 - DEBUG - http://127.0.0.1:8008 "POST /v1/completions HTTP/1.1" 200 None


{'id': 'comp-49stw55T2jff', 'object': 'text_completion', 'status': 'completed', 'created': 1701060926.2475886, 'uploaded': 1701060938.3415048, 'generated_tokens_n': 71, 'model': 'codellama/7b', 'choices': [{'index': 0, 'logprobs': None, 'finish_reason': 'stop-eot', 'text': '       builder.describe(TABLE_COLUMN_LIST_PROP, TABLE_COLUMN_LIST_LABEL, TABLE_COLUMN_LIST_DESC)\n                .type(SnapType.TABLE)\n                .withEntry(columnNameProp)\n                .withEntry(columnDataTypeProp)\n <EOT>'}]}

 Inference output: 
 [32m       builder.describe(TABLE_COLUMN_LIST_PROP, TABLE_COLUMN_LIST_LABEL, TABLE_COLUMN_LIST_DESC)
                .type(SnapType.TABLE)
                .withEntry(columnNameProp)
                .withEntry(columnDataTypeProp)
 <EOT>[0m

 Label: 
         builder.describe(TABLE_COLUMN_LIST_PROP, TABLE_COLUMNS_LIST_LABEL, TABLE_COLUMNS_LIST_DESC)
                .type(SnapType.TABLE)
                .enableIf(COLUMN_TABLE_ENABLE_COND)
                .with

2023-11-27 04:55:42,520 - DEBUG - Starting new HTTP connection (1): 127.0.0.1:8008
2023-11-27 04:55:42,522 - DEBUG - http://127.0.0.1:8008 "POST /v1/completions HTTP/1.1" 200 None


{'id': 'comp-Am2CfdKQDhNi', 'object': 'text_completion', 'status': 'completed', 'created': 1701060938.3453043, 'uploaded': 1701060942.517956, 'generated_tokens_n': 25, 'model': 'codellama/7b', 'choices': [{'index': 0, 'logprobs': None, 'finish_reason': 'stop-eot', 'text': '       }\n    }\n\n    public void executeSql(String sql) {\n        establishConnection();\n <EOT>'}]}

 Inference output: 
 [32m       }
    }

    public void executeSql(String sql) {
        establishConnection();
 <EOT>[0m

 Label: 
         }
    }

    public void executeSql(String sql) {
        establishConnection();


 Prompt: 
 [33m<PRE>     protected String url;
    protected String exportOrServiceURL;
    protected List<Pair<String, ExpressionProperty>> queryParams;
    protected boolean isBulkUpdate = false;
    protected boolean isValidation = false;
    protected boolean moreRequests = false;
    protected boolean isAllEntities = false;
    protected Map<String, Object> cursorMap = null;
    protec

2023-11-27 04:55:53,683 - DEBUG - Starting new HTTP connection (1): 127.0.0.1:8008
2023-11-27 04:55:53,685 - DEBUG - http://127.0.0.1:8008 "POST /v1/completions HTTP/1.1" 200 None


{'id': 'comp-GWTKIRPM37ZE', 'object': 'text_completion', 'status': 'completed', 'created': 1701060942.5218859, 'uploaded': 1701060953.6807272, 'generated_tokens_n': 71, 'model': 'codellama/7b', 'choices': [{'index': 0, 'logprobs': None, 'finish_reason': 'stop-eot', 'text': '                   }\n                } else if (value instanceof List) {\n                    fieldsSet.add(key);\n                    jsArr = (List) value;\n                    if (rootKey != null) {\n                        findKeysOfArray(jsArr, rootKey + "." + key, fieldsSet);\n <EOT>'}]}

 Inference output: 
 [32m                   }
                } else if (value instanceof List) {
                    fieldsSet.add(key);
                    jsArr = (List) value;
                    if (rootKey != null) {
                        findKeysOfArray(jsArr, rootKey + "." + key, fieldsSet);
 <EOT>[0m

 Label: 
                     }
                } else if (value instanceof List) {
                    jsArr = (

2023-11-27 04:56:08,334 - DEBUG - Starting new HTTP connection (1): 127.0.0.1:8008
2023-11-27 04:56:08,336 - DEBUG - http://127.0.0.1:8008 "POST /v1/completions HTTP/1.1" 200 None


{'id': 'comp-RPTnttwihXPi', 'object': 'text_completion', 'status': 'completed', 'created': 1701060953.684675, 'uploaded': 1701060968.3313718, 'generated_tokens_n': 90, 'model': 'codellama/7b', 'choices': [{'index': 0, 'logprobs': None, 'finish_reason': 'stop-eot', 'text': '   }\n\n    /**\n     * getSchema\n     *\n     * @param serverVersion\n     * @return XSD to be used for REST calls\n     */\n    public String getSchema(String serverVersion) {\n        return getAPIVersion(serverVersion) == null ? getXmlSchema() : getAPIVersion(serverVersion);\n    }\n\n    /**\n     * getAPIVersion\n <EOT>'}]}

 Inference output: 
 [32m   }

    /**
     * getSchema
     *
     * @param serverVersion
     * @return XSD to be used for REST calls
     */
    public String getSchema(String serverVersion) {
        return getAPIVersion(serverVersion) == null ? getXmlSchema() : getAPIVersion(serverVersion);
    }

    /**
     * getAPIVersion
 <EOT>[0m

 Label: 
     }

    /**
     * Gives the API 

In [None]:
# Inspect lines input .csv file

# n = 100

# snap_lines_prompts_df = pd.read_csv("../data/prompts/lines_df.csv")
# #snap
# print(len(snap_lines_prompts_df))
# lines_nth_row = snap_lines_prompts_df.iloc[n]

# print(lines_nth_row)


# for column_name, cell_content in lines_nth_row.items():
#     print(f"Column: {column_name}\nContent: {cell_content}\n{'-'*50}")



In [None]:
# Inspect methods input .csv file

# n = 0

# snap_methods_prompts_df = pd.read_csv("../data/methods_df_10_30.csv")

# #how many prompts?
# print(len(snap_methods_prompts_df))

# #inspect a line
# lines_nth_row = snap_methods_prompts_df.iloc[n]
# print(lines_nth_row)


# for column_name, cell_content in lines_nth_row.items():
#     print(f"Column: {column_name}\nContent: {cell_content}\n{'-'*50}")



In [None]:
# Clean up initial .csvL 1) Update column headers 2) drop NaN columnns
# snap_methods_prompts_df.rename(columns={"0": "path", "1":"prompt", "2":"label", "3":"start_line", "4":"end_line"}, inplace=True)
# snap_methods_prompts_df.drop(columns=['5', '6', '7', '8'], inplace=True)
# snap_methods_prompts_df

In [None]:
# orig = snap_methods_prompts_df.iloc[0]["prompt"]
# print_me = termcolor.colored(orig, "yellow")
# print("\n", "Prompt:","\n",print_me)

In [None]:
# # Inspect methods .csv
# snap_methods_prompts_df = pd.read_csv("methods_df_10_30.csv")
# methods_first_row = snap_prompts_df.iloc[0]

# print(first_row)


# for column_name, cell_content in first_row.items():
#     print(f"Column: {column_name}\nContent: {cell_content}\n{'-'*50}")

In [None]:
# Run the prompts through the model storing in a list to add back to our original dataframe

# completion_output = []

# for i in range(0,len(snap_methods_prompts_df)):
#     orig = snap_methods_prompts_df.iloc[i]["prompt"]
#     #print_me = termcolor.colored(orig[:-1], "yellow")
#     print_me = termcolor.colored(orig, "yellow") # line above is stripping '>' from <MID> token

#     print("\n", "Prompt:","\n",print_me)

#     # run inference with prompt
#     t = run_completion_call(orig)

#     # save response
#     uncut = t

#     # process response
#     lines = t.split("\n")
#     filtered = [x for x in lines if x.startswith(" ") or x.strip() == ""]
#     t = "\n".join(filtered)

#     # add response to list
#     completion_output.append(t)
    
    
#     #assert uncut.startswith(t)
#     # Print response together with prompt and label
#     print_response = termcolor.colored(t, "green") + " " + termcolor.colored(uncut[len(t):], attrs=["dark"])
#     print(print_response)
#     print("\n", "Label:","\n", snap_methods_prompts_df.iloc[i]["label"])

# # Add generated output back to original DataFrame
# snap_methods_prompts_df['Completion'] = completion_output

In [None]:
 # snap_methods_prompts_df

In [None]:
# Inspect lines .csv post inference
# snap_lines_prompts_df['Completion'] = completion_output
# lines_first_row = snap_lines_prompts_df.iloc[0]

# print(lines_first_row)


# for column_name, cell_content in lines_first_row.items():
#     print(f"Column: {column_name}\nContent: {cell_content}\n{'-'*50}")

In [None]:
# Write inference output to .csv
# snap_lines_prompts_df.to_csv('"../data/output/20231109_lora-20231107-201630_finetune_inference_output.csv', index=False)
# Write hyperparameters to a JSON file
# with open('../data/output/20231107_lora-20231107-201630_metadata.json', 'w') as meta_file:
#     json.dump(metadata, meta_file, indent=4)

# # Write inference output to .csv
# snap_methods_prompts_df.to_csv('../data/output/20231112_codellama7B_pretrained_inference_output.csv', index=False)

# # Write hyperparameters to a JSON file
# with open('../data/output/20231112_codellama7B_pretrained_metadata.json', 'w') as meta_file:
#     json.dump(metadata, meta_file, indent=4)


## MPI Variant: Run /completions endpoint on prompts, test code for generate_responses.py

In [63]:
# Imports
import sys, termcolor, subprocess, json, time, random
from copy import deepcopy
from mpi4py import MPI
from human_eval.data import write_jsonl, read_problems
from human_eval.data import read_problems
import requests
import pandas as pd

from config import config_dict, logger

In [64]:
# Global config, can be overridden via function arguments to 
# evaluation_framework function.
PROMPTS_PATH = config_dict['prompt_directory']
GENERATED_RESPONSES_DIRECTORY = config_dict['generated_responses_directory']
MODEL = config_dict['inference_model']
TEMPERATURE = config_dict['inference_temperature']
TOP_P = config_dict['inference_top_p']
TIMES = config_dict['inference_times']
MAX_TOKENS = config_dict['inference_max_tokens']
INFERENCE_ENDPOINT = config_dict['inference_endpoint']

metadata = {
    'MODEL': MODEL,
    'TEMPERATURE': TEMPERATURE,
    'TOP_P': TOP_P,
    'TIMES': TIMES,
    'MAX_TOKENS': MAX_TOKENS
}

In [None]:
# Function definitions - MPI variant, allows multiple GPUs to be used for inference
def run_completion_call(src_txt,
                        model,
                        max_tokens,
                        top_p,
                        temperature):
    """ Execute a single call to refact /v1/completions API

    Keyword arguments:
    src_txt -- prompt to be fed to refact hosted model. The Completion
    API assumes that prompt is a completion-oriented prompt, typically
    of the form <PRE> ... text ... <SUF> .. text .. <MID>. It is the 
    model's task to "fill in the missing text between <PRE> and <SUF>.
    """
    res = requests.post(f"{INFERENCE_ENDPOINT}/v1/completions", json={
        "model": model,
        "max_tokens": max_tokens,
        "stream": False,
        "echo": True,
        "top_p": top_p,
        "temperature": temperature,
        "prompt": src_txt,
        "stop": ["\n\n\n"],
    })
    res.raise_for_status()
    j = res.json()
    # print(j)
    return j["choices"][0]["text"]


def generate_responses(output_directory = GENERATED_RESPONSES_DIRECTORY,
                       prompts_path = PROMPTS_PATH,
                       model = MODEL,
                       temperature = TEMPERATURE,
                       top_p = TOP_P,
                       times = TIMES,
                       max_tokens = MAX_TOKENS):
    """ Given an output directory, generate responses via inference
    and writes responses to the output directory.

    Keyword arguments:
    output_directory -- directory to write responses, default specified
                        in config.py
    prompts_path -- directory to find prompt files. default specified
                    in config.py.
    prompt_file -- alternative, specify a specific prompt file. If
                   prompt file is specified, the batch directory
                   approach is overriden. No default.
    model -- specify model to be run format examples: 'codellama' or
             'codellama/7b/lora-20231107-201630' in the case of a fine
             tuned model. Default specified in config.py.
    temperature -- temperature for chosen model. Default specified in 
                   config.py
    top_p -- Top P cutoff for token selection. Default specified in 
             config.py
    times -- Number of inference calls for the prompt, supporting
             sampling of outputs. Default specified in config.py
    max_tokens - Maximum number of tokens to be generated. Default
                 specified in config.py.
    """
    # ToDo: Add in ability to specify a specific prompts file and output directory for testing
    logger.info("Generate responses")

    # Process all prompt files in prompts/ directory
    for file in os.listdir(prompts_path):
        if file != 'skipped_files.csv':
            file_path = os.path.join(prompts_path, file)
            # Prompt dataframe. Columns are expected to be labeled
            df = pd.read_csv(file_path)

            # Loop over prompts dataframe, run inference to complete infilling
            # After each file is complete, write new .csv with inference output
            # as new column. Additionally write corresponding metadata file.
            completion_output = []
            for i in range(0,len(df)):
             
                # prompt from input dataframe
                prompt = df.iloc[i]["prompt"]
                print_prompt = termcolor.colored(prompt, "yellow") 

                print("\n", "Prompt:","\n",print_prompt)

                # run inference with prompt
                t = run_completion_call(prompt,
                                        model,
                                        max_tokens,
                                        top_p,
                                        temperature)

                # add response to list
                completion_output.append(t)

                # Print response together with prompt and label
            
                print_response = termcolor.colored(t, "green")

                print("\n", "Inference output:", "\n", print_response)
                print("\n", "Label:","\n", df.iloc[i]["label"])

            # Add generated output back to original DataFrame
            df['Completion'] = completion_output

            # # Write inference output to .csv
            # timestamp = time.strftime("%Y%m%d-%H%M%S")
            # inference_output_filename = f"{output_directory}{timestamp}_{model.replace('/', '-')}_output.csv"
            # df.to_csv(inference_output_filename, index=False)

            # # Write metadata to a JSON file
            # metadata_output_filename = f"{output_directory}{timestamp}_{model.replace('/', '-')}_metadata.json"
            # with open(metadata_output_filename, 'w') as meta_file:
            #     json.dump(metadata, meta_file, indent=4)

            try:
                # Write inference output to .csv
                timestamp = time.strftime("%Y%m%d-%H%M%S")
                model_filename = model.replace('/', '-')  # Replace forward slashes in model name
                inference_output_filename = os.path.join(output_directory, f"{timestamp}_{model_filename}_output.csv")
            
                # Check if the directory exists, create if not
                if not os.path.exists(output_directory):
                    os.makedirs(output_directory)
            
                df.to_csv(inference_output_filename, index=False)
            
            except Exception as e:
                print(f"An error occurred while writing the CSV file: {e}")
            
            try:
                # Write metadata to a JSON file
                metadata_output_filename = os.path.join(output_directory, f"{timestamp}_{model_filename}_metadata.json")
                
                with open(metadata_output_filename, 'w') as meta_file:
                    json.dump(metadata, meta_file, indent=4)
            
            except Exception as e:
                print(f"An error occurred while writing the JSON file: {e}")


## Run directly against model (this doesn't work yet at all and seems harder. Started by Adam, minimal changes by Jeremiah)

In [None]:
import torch

In [None]:
# Define model
class TheModelClass(torch.nn.Module):
    def __init__(self):
        super(TheModelClass, self).__init__()
        self.conv1 = nn.Conv2d(3, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 5 * 5)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

In [None]:
class CodeLlama7B(nn.Module):
    def __init__(self, ...):  # Define the model's architecture parameters
        super(CodeLlama7B, self).__init__()
        # Define the layers and components of the model

    def forward(self, ...):  # Define the forward pass
        # Implement the forward pass logic

# Create an instance of the model
model = CodeLlama7B(...parameters...)

In [None]:
# Load the model weights from the .pt file
model_weights = torch.load("path/to/your/fine_tuned_model.pt")

# Load the state dictionary into your model
model.load_state_dict(model_weights)

In [None]:
 model.eval()

# Loading Model For Inference

In [None]:
model = TheModelClass(*args, **kwargs)
model.load_state_dict(torch.load(PATH))
model.eval()

# Load Entire Model

In [None]:
import os
os.getcwd()

In [None]:
# Model class must be defined somewhere
model = torch.load('/home/ubuntu/.refact/perm-storage/loras/lora-20231026-161421/checkpoints/iter0070-testloss0.833/mp_rank_00_model_states.pt')
model

In [None]:
model.eval()

# Load Model In TorchScript Format

In [None]:
model = torch.jit.load('/home/ubuntu/.refact/perm-storage/loras/lora-20231026-161421/checkpoints/iter0070-testloss0.833/mp_rank_00_model_states.pt')
model.eval()

In [None]:
model = torch.jit.load('../.refact/perm-storage/loras/lora-20231002-231436/checkpoints/iter0750-testloss0.353/mp_rank_00_model_states.pt')
model.eval()