# Big Code Bench Replicate

In [1]:
from human_eval.data import write_jsonl, read_problems, BIGCODE_TEST
from human_eval.evaluation import evaluate_functional_correctness
import os
import logging
import sys
import time, datetime
from langchain_community.llms import Replicate
import json
from typing import Iterable, Dict
import gzip

%load_ext autoreload
%autoreload 2

In [2]:
def remove_triple_backticks_at_edges(text: str) -> str:
    """
    Removes triple backticks (` ``` `) if they appear at the beginning or the end of the text.

    :param text: The input text as a string.
    :return: A string with triple backticks removed from the edges.
    """
    s1 = "```python\n"
    s2 = "```python"
    s3 = "```"
    for s in [s1, s2, s3]:
        if text.startswith(s):
            text = text[len(s):].lstrip()      # Remove a version of backticks + extra whitespace at the start
    if text.endswith(s3):
        text = text[: -len(s3)].rstrip()       # Remove the backticks + extra whitespace at the end
    return text


def remove_last_triple_backticks(text: str) -> str:
    """
    Removes everything starting from the last triple backticks (` ``` `) to the end.

    :param text: The input text as a string.
    :return: A string with all text removed after the last set of triple backticks
    """
    s = "```"
    idx = text.rfind( s )
    return text[:idx]

In [3]:
def get_import_statements(code_str: str) -> str:
    """
    Keeps lines that begin with 'import' or 'from' from a multiline string.
    """
    lines = code_str.splitlines()
    filtered_lines = []
    for line in lines:
        stripped_line = line.strip()
        if stripped_line.startswith('import ') or stripped_line.startswith('from '):
            filtered_lines.append(line)
    res = '\n'.join(filtered_lines) + '\n'
    s = 'from typing import List'
    if s not in res:
        res = s + '\n' + res
    return res

# Example usage
code_with_imports = '''import collections
import random
import string

def task_func(length=100):
    return 1
'''
print( get_import_statements(code_with_imports) )

from typing import List
import collections
import random
import string



In [4]:
def write_jsonl(filename: str, data: Iterable[Dict], append: bool = False):
    """
    Writes an iterable of dictionaries to jsonl
    """
    if append:
        mode = 'ab'
    else:
        mode = 'wb'
    filename = os.path.expanduser(filename)
    if filename.endswith(".gz"):
        with open(filename, mode) as fp:
            with gzip.GzipFile(fileobj=fp, mode='wb') as gzfp:
                for x in data:
                    gzfp.write((json.dumps(x) + "\n").encode('utf-8'))
    else:
        with open(filename, mode) as fp:
            for x in data:
                fp.write((json.dumps(x) + "\n").encode('utf-8'))


def stream_jsonl(filename: str) -> Iterable[Dict]:
    """
    Parses each jsonl line and yields it as a dictionary
    """
    if filename.endswith(".gz"):
        with open(filename, "rb") as gzfp:
            with gzip.open(gzfp, 'rt') as fp:
                for line in fp:
                    if any(not x.isspace() for x in line):
                        yield json.loads(line)
    else:
        with open(filename, "r") as fp:
            for line in fp:
                if any(not x.isspace() for x in line):
                    yield json.loads(line)


def read_problems(evalset_file: str) -> Dict[str, Dict]:
    return {task["task_id"]: task for task in stream_jsonl(evalset_file)}

In [5]:
def is_full_function(input_str: str) -> str:
    '''
    Determine if input string is a Python function
    using simple heuristics (all that's needed)
    '''
    lines = input_str.splitlines()
    for line in lines:
        if line.startswith('def '):
            return True
    return False


def remove_triple_backticks_at_edges(text: str) -> str:
    """
    Removes triple backticks (` ``` `) if they appear at the beginning or the end of the text.

    :param text: The input text as a string.
    :return: A string with triple backticks removed from the edges.
    """
    s1 = "```python\n"
    s2 = "```python"
    s3 = "```"
    for s in [s1, s2, s3]:
        if text.startswith(s):
            text = text[len(s):].lstrip()      # Remove a version of backticks + extra whitespace at the start
    if text.endswith(s3):
        text = text[: -len(s3)].rstrip()       # Remove the backticks + extra whitespace at the end
    return text


def add_import(text: str) -> str:
    ''' Adds import statements stripped by LLM '''
    text = text.strip()
    s = 'from typing import List'
    if 'def ' in text and s not in text:
        text = s + '\n\n' + text
    return text

string = 'def my_func(input):\n    return output'
print(add_import(string))

from typing import List

def my_func(input):
    return output


In [6]:
def get_response(model, prompt):
    ''' One API call to a Mistral model '''
    chat_response = client.chat.complete(
                model = model,
                messages = [{
                        "role": "user",
                        "content": prompt, 
                    }, ]
                )
    return chat_response.choices[0].message.content

In [7]:
def read_jsonl(file_path):
    '''
    Read a JSONL file.
    Parameters:
        file_path (str): The path to the JSONL file.
    Returns:
        List[dict]: A list of dictionaries representing the JSON objects.
    '''
    data = []
    with open(file_path) as f:
        for line in f:
            line = line.strip()
            if line:
                data.append(json.loads(line))
    return data

In [8]:
tasks = read_problems(BIGCODE_TEST)
print(f'Type of tasks: {type(tasks)}\n')

counter = 0
for k,v in tasks.items():
    if counter == 10:
        break
    print(f'task_id: {k}', type(k))
    for k2, v2 in v.items():
        print(f'\n{k2}:\n{v2}')
    print('\n' + '='*100 + '\n')
    counter += 1

Type of tasks: <class 'dict'>

task_id: BigCodeBench/0 <class 'str'>

task_id:
BigCodeBench/0

test:


import unittest
from unittest.mock import patch
from random import seed, shuffle
import itertools
class TestCases(unittest.TestCase):
    def test_default_numbers(self):
        # Test with default number range (1 to 10) to check that the result is a positive float.
        result = task_func()
        self.assertIsInstance(result, float)
        self.assertGreater(result, 0)
    def test_custom_list(self):
        # Test with a custom list of small positive integers to ensure proper handling and positive result.
        result = task_func([1, 2, 3])
        self.assertIsInstance(result, float)
        self.assertGreater(result, 0)
    def test_negative_numbers(self):
        # Test with negative numbers to verify the function handles and returns a positive result.
        result = task_func([-3, -2, -1])
        self.assertIsInstance(result, float)
        self.assertGreater(result, 

In [9]:
task = tasks['BigCodeBench/15']

# smaller models ouput additional trash with this propmt (before and after the code)
completion_prompt = 'Complete the following Python code:\n'

completion_prompt = '''1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:\n\n'''
print(completion_prompt + task['prompt'])

1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import subprocess
import csv
import os

def task_func(commands_file_path, output_dir_path):
    """
    Execute a list of shell commands read from a CSV file and save the outputs in separate files.
    Each command's output is written to a unique file in the specified output directory.
    If a command fails, the error message along with the exit code is appended to the respective output file.

In [10]:
solar_10b = Replicate(
    model        = 'nateraw/nous-hermes-2-solar-10.7b:1e918ab6ffd5872c21fba21a511f344fd12ac0edff6302c9cd260395c7707ff4',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

phixtral_7b = Replicate(
    model        = 'lucataco/phixtral-2x2_8:25d7b93bb0ec9e8dd94fcc69adc786759243a5628ba5574bd9609d6abafe57cf',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

mamba = Replicate(
    model        = 'adirik/mamba-2.8b:864ed9237f28ff2e8eb55ffc07e9f0d7857888cacb9c32a92081f24f49a861db',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

gemma_7b = Replicate(
    model        = 'google-deepmind/gemma-7b-it:2790a695e5dcae15506138cc4718d1106d0d475e6dca4b1d43f42414647993d5',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

gemma_2b = Replicate(
    model        = 'google-deepmind/gemma-2b-it:dff94eaf770e1fc211e425a50b51baa8e4cac6c39ef074681f9e39d778773626',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}

)

phi = Replicate(
    model        = 'lucataco/phi-2:740618b0c24c0ea4ce5f49fcfef02fcd0bdd6a9f1b0c5e7c02ad78e9b3b190a6',
    model_kwargs = {"temperature": 0.95,"top_p": 1, "max_new_tokens":512}
)

yi_6b = Replicate(
    model        = '01-ai/yi-6b:d302e64fad6b4d85d47b3d1ed569b06107504f5717ee1ec12136987bec1e94f1',
    model_kwargs = {"temperature": 0.95, "top_p": 1, "max_new_tokens":512}
)

current_model = solar_10b
current_model('Hello. How are you?')

  current_model('Hello. How are you?')


'Greetings! I am functioning optimally, thank you for asking. How may I assist you today?'

In [11]:
#model_nickname = 'llama3_8B'
#model_nickname = 'mistral_7B'
#model_nickname = 'gemma_7B'
#model_nickname = 'phixtral_8B'
#model_nickname = 'gemma_2B'
model_nickname = 'solar_10B'
#model_nickname = 'phixtral'
#model_nickname = 'qwen_1_5_7B'
#model_nickname = 'mamba2'
#model_nickname = 'yi_6b'
#model_nickname = 'phi'
time_stamp     = datetime.datetime.now().strftime("%Y%m%d_%H%M%S_%f")[:-2]
file_name      = f'logs/{model_nickname}_samples_{time_stamp}.jsonl'
file_name

'logs/solar_10B_samples_20250124_235810_7835.jsonl'

In [15]:
num_samples_per_task = 1
completions = []
is_full_func = []
print(f'Model: {model_nickname}\n')

for task_id, task_body in tasks.items():
    print(task_id)
    #if task_id == 'lbpp/5':
    #        break
    for i in range(num_samples_per_task):
        start_time  = time.time()
        full_prompt = completion_prompt + task_body['prompt']
        try:
            completion  = current_model( full_prompt )
        except Exception as e:
            completion = f"Error generating completion:\n{e}"
        
        completion = remove_triple_backticks_at_edges(completion.strip())
        is_full = is_full_function(completion)
        is_full_func.append(is_full)
        if is_full:
            completion = add_import(completion)
        completions.append( {'task_id': task_id, 'completion': completion} )
        write_jsonl(file_name, completions)
        print('Prompt:\n', full_prompt, '\n', '-'*75, '\n', sep='')
        print('Completion:\n', completion, sep='')
        print(f"\nTime elapsed: {(time.time() - start_time):.4f} seconds\n", '\n', '='*75, '\n', sep='')

Model: solar_10B

BigCodeBench/0
Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import itertools
from random import shuffle

def task_func(numbers=list(range(1, 3))):
    """
    Calculates the average of the sums of absolute differences between each pair of consecutive numbers 
    for all permutations of a given list. Each permutation is shuffled before calculating the differences.

    Args:
    - numbers (list): A list of numbe

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import subprocess
import csv
import os

def task_func(commands_file_path, output_dir_path):
    """
    Execute a list of shell commands read from a CSV file and save the outputs in separate files.
    Each command's output is written to a unique file in the specified output directory.
    If a command fails, the error message along with the exit code is appended to the respective outp

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from itertools import zip_longest

def task_func(l1, l2,THRESHOLD = 0.5):
    """
    Alternates elements from two numeric lists, calculates the absolute difference of each 
    element from a predefined threshold, and returns the element closest to this threshold.
    
    Parameters:
    l1 (list): The first input list containing numeric values.
    l2 (list): The 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from functools import reduce

def task_func(list_of_pairs):
    """ 
    Calculate the product of the second values in each tuple in a list of tuples and return the product as a single-element numeric array.
    
    Parameters:
    list_of_pairs (list): A list of tuples, where the first element is the category 
                          and the second element is the

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import seaborn as sns

def task_func(df):
    """
    Describe a dataframe and draw a distribution chart for each numeric column after replacing the NaN values with the average of the column.

    Parameters:
    df (DataFrame): The pandas DataFrame.

    Returns:
    tuple: A tuple containing:
        - DataFrame: A pandas DataFrame with statistics. This includes co

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

from sklearn.cluster import KMeans
import matplotlib.pyplot as plt


def task_func(df, age: int, height: int):
    """
    Filters the input DataFrame based on specified 'Age' and 'Height' conditions and applies KMeans clustering.
    - If the filtered dataframe has less than 3  columns, add to it a column 'Cluster' with 0 for each row.
    - Otherwise, do a KMeans clustering (by Age a

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

def task_func(csv_file_path: str, title: str):
    """
    Create a heatmap of the correlation matrix of a DataFrame built from a CSV file. Round each correlation to 2 decimals.

    Parameters:
    csv_file_path (str): The path to the CSV file containing the input data.
    title (str): The title of the heatmap

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import matplotlib.pyplot as plt

COLUMNS = ['col1', 'col2', 'col3']

def task_func(data):
    """
    You are given a list of elements. Each element is a list with the same length as COLUMNS, representing one row a dataframe df to create. Draw a line chart with unique values in the COLUMNS[-1] of the pandas DataFrame "df", grouped by the rest of the columns.
    - T

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import os
import numpy as np
import ast

def task_func(directory):
    """
    Traverse a directory for CSV files a get the file with the longest filename. From that CSV file, load e-mail data, convert it into a Pandas DataFrame, calculate the sum, mean and median of the list associated with each e-mail, and then draw a histogram of the median.
    - The column name

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import pandas as pd
from datetime import timedelta

def task_func(start_date, end_date, random_seed=42):
    """
    Generate and plot weather data for a specified date range.
    
    This function creates a DataFrame containing simulated daily weather data 
    within the specified date range. It generates random values for temperature, 
    humidity, and wind spee

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

from scipy.stats import linregress
import matplotlib.pyplot as plt

def task_func(data, column1, column2):
    """
    Perform a linear regression on two columns of a dataset and record the result.
    Additionally, generates a plot representing the original data and the fitted line.

    Parameters:
    data (DataFrame): The dataset.
    column1 (str): The name of the first column.
  

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_iris

def task_func():
    """
    Draws a seaborn pair plot of the iris dataset using Arial font.

    This function sets the global font to Arial for better readability and visual appeal. It then generates a pair plot from the iris dataset, where each subplot represents the rel

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import matplotlib.pyplot as plt
from itertools import cycle

def task_func(df, groups=['A', 'B', 'C', 'D', 'E']):
    """
    Analyzes the groups in a DataFrame by plotting a scatter plot of the ordinals against the values for each group.

    Parameters:
    df (DataFrame): The DataFrame with columns 'group', 'date', and 'value'.
    groups (list, optional): List o

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import json
from collections import Counter
import random

def task_func(my_dict, keys):
    """
    Updates a given dictionary by adding 10 random elements based on the 'keys' parameter,
    with values as random integers from 1 to 100. It saves the JSON representation of the
    updated dictionary to a file and the counts of each key to a separate text file.

    Parameters:
        

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import numpy as np
from random import choice, seed as set_seed

def task_func(num_of_students, seed=42, name_list=None, gender_list=None, age_range=(15, 20), score_range=(50, 100)):
    """
    Generate a Pandas DataFrame with randomized student data. This function allows for specifying 
    the total number of students and the randomness seed for reproducible outco

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from datetime import datetime, timedelta
from random import randint, seed as random_seed

def task_func(start_date=datetime(2020, 1, 1), end_date=datetime(2020, 12, 31), seed=42):
    """
    Generate a pandas Series of random dates within a specified date range, 
    including both start_date and end_date, with an optional seed for reproducibility.
    
    The fun

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import matplotlib.pyplot as plt
from random import randint
import math

def task_func(POINTS=100):
    """
    Simulates a random walk in a two-dimensional space and draws the path using matplotlib.
    The walk is determined by randomly choosing directions at each step. The function generates
    two numpy arrays representing the x and y coordinates of each step and

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from scipy.stats import skew

def task_func(df):
    """
    Calculate the skewness of the last column of the dataframe.

    Parameters:
    df (DataFrame): The input dataframe.

    Returns:
    float: The skewness of the last column of the dataframe.

    Raises:
    ValueError: If the input is not a DataFrame or has no columns.

    Requirements:
    - pandas
  

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.preprocessing import LabelEncoder


def task_func(df: pd.DataFrame, column_name: str) -> pd.DataFrame:
    """
    Encrypt the categorical data in a specific column of a DataFrame using LabelEncoder.

    Parameters:
    df (pd.DataFrame): The DataFrame that contains the data.
    column_name (str): The name of the column to encode.

    Returns:
    pd

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
import pandas as pd
from datetime import datetime


def task_func(log_file):
    """
    Extracts logging information such as message type, timestamp, and the message itself from a log file and
    stores the data in a CSV format. This utility is ideal for converting plain text logs into a more s
    tructured format that can be easily analyzed. The log is the format of 'TYPE

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np


def task_func(df):
    """
    Analyzes a given DataFrame containing article titles and content to identify articles with titles that include
    the words "how" or "what". It calculates the TF-IDF scores for the words in the content of these articles and
    visua

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
import json
import requests

def task_func(data_url: str) -> list:
    """
    Fetch data from a specific URL and extract all names from the JSON-formatted data that are not enclosed by square brackets.
    No specific status code should be raised.
    
    Note:
    - The function uses regular expressions to search for names in the fetched data. Names that are inside square


Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import heapq
import math
import matplotlib.pyplot as plt


def task_func(l1, l2, N=10):
    """ 
    Find the N biggest differences between the respective elements of the list 'l1' and list 'l2', 
    square the differences, take the square root and return the plotted values as a matplotlib Axes object.

    Parameters:
    l1 (list): A list of numbers.
    l2 (list): A list of numbers

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import json
import smtplib

# Constants
SMTP_SERVER = "smtp.gmail.com"
SMTP_PORT = 587
EMAIL_ADDRESS = "your.email@gmail.com"
EMAIL_PASSWORD = "your.password"

def task_func(input_data=None, smtp_server=SMTP_SERVER, smtp_port=SMTP_PORT, email_address=EMAIL_ADDRESS, email_password=EMAIL_PASSWORD, smtp=None):
    """
    Extract recepient email address and names from JSON-formatted strin

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from scipy import stats
import matplotlib.pyplot as plt


def task_func(mu=0, sigma=1, sample_size=1000, seed=0):
    """
    Generate a sample from a normal distribution with a given mean and a standard deviation and plot the histogram 
    together with the probability density function. Returns the Axes object representing the plot and the empirical
    mean and st

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.preprocessing import LabelEncoder

def task_func(df, dct, columns=None):
    """
    This function preprocesses a pandas DataFrame by replacing specified values, encoding categorical attributes, 
    and standardizing numerical attributes. It's designed to be flexible for data preprocessing in machine learning tasks.

    Parameters:
    - df (DataFrame

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import random
import matplotlib.pyplot as plt


# Sample data
class Object:
    value = 0

    def __init__(self, value=None):
        if value is None:
            self.value = random.gauss(0, 1)
        else:
            self.value = value


def task_func(obj_list, attr, num_bins=30, seed=0):
    """
    Create a histogram of the specified attribute from a list of objects and return 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import random
from sklearn.preprocessing import StandardScaler

# Constants
N_DATA_POINTS = 5000
MIN_VALUE = 0.0
MAX_VALUE = 10.0

def task_func(n_data_points=5000, min_value=0.0, max_value=10.0):
    """
    Generate a random dataset of floating point numbers, truncate each value to 3 decimal places and normalize the data using standard scaling (mean = 0, std = 1).

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import matplotlib.pyplot as plt
import numpy as np


def task_func(ax, radius):
    '''
    Draw a circle with a given radius on the polar chart 'ax' and set radial ticks.
    This function manipulates plot data using matplotlib.

    Parameters:
    ax (matplotlib.axes._axes.Axes): The ax to plot on. Must be a polar plot.
    radius (float): The radius of the circle. Must be non-negat

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from scipy import stats
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt


def task_func(data_dict):
    """
    Performs the following operations on the input dictionary 'data_dict':
    1. Adds a key "a" with a value of 1.
    2. Conducts statistical analysis on its values (mean, median, mode), by rounding the mean to 2 decimal places.

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import os
import json
from collections import Counter


def task_func(json_files_path='./json_files/', key='name'):
    """
    Count the occurrence of a particular key in all json files in a specified directory 
    and return a dictionary with the values of the specified key and their counts.
    
    Parameters:
    - json_files_path (str): The path to the directory containing the J

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pytz
import numpy as np
from dateutil.parser import parse
import math


SOLAR_CYCLE_YEARS = np.array([1986, 1996, 2008, 2019])

def task_func(date_str, from_tz, to_tz):
    """
    Calculate solar activity based on the date and time, taking into account the solar cycle of 11 years.

    Parameters:
    date_str (str): The date string in "yyyy-mm-dd hh:mm:ss" format.
    from_tz 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

from collections import Counter
import itertools
import random


# Constants
ALPHABET = 'abcdefghijklmnopqrstuvwxyz'

def task_func(list_of_lists, seed=0):
    """
    Count the frequency of each letter in a list of lists. If a list is empty, 
    fill it with a random sample from the alphabet, and then count the letters.
    
    Parameters:
    list_of_lists (list): The list of lists

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import csv
import re
from collections import Counter


def task_func(file_path, regex_pattern=r'\(.+?\)|\w+|[\W_]+'):
    """
    Counts matches from a CSV file based on a given regex pattern. 
    By default, it captures content between parentheses as a single match and 
    any word or sequence of non-alphanumeric characters outside as matches in a string.
    
    Parameters:
    - 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import string
import random
import re


def task_func(elements, pattern, seed=100):
    """
    Replace each character in each element of the Elements list with a random 
    character and format the element into a pattern "%{0}%", where {0} is the
    replaced element. Finally, concatenate all the formatted elements into a 
    single string and search for the regex pattern specified 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import subprocess
import os
import shutil
from glob import glob


def task_func(src_folder, dst_folder):
    """Compress all files in the specified source folder and move the compressed files to a destination folder.
    This operation is executed as a background process using the 'gzip' command.

    Parameters:
    src_folder (str): The path of the source folder containing the files 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

def task_func(x):
    """
    Draws a plot visualizing a complex distribution created from two Gaussian distributions.
    The real part of the complex distribution is a Gaussian centered at 0 with a standard deviation of 1,
    and the imaginary part is a Gaussian centered at 2 with a standard deviation o

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import os

def task_func(original_file_location="test.xlsx", new_file_location="new_test.xlsx", sheet_name="Sheet1"):
    """
    Copies data from an Excel spreadsheet into a new Excel file, then reads the new Excel file and returns its contents.

    Parameters:
    - original_file_location (str): Path to the original Excel file. Defaults to 'test.xlsx'.
    - new_

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import nltk
import re
from collections import Counter


# Constants
STOPWORDS = nltk.corpus.stopwords.words('english')

def task_func(text):
    """
    Calculate the frequency of continuous words in a text string. The function splits the text into words, 
    converts them to lowercase, removes punctuation marks and common stopwords (provided as a constant), 
    and then calculates t

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from scipy.stats import norm
import matplotlib.pyplot as plt

def task_func(length):
    """
    Create a normal distribution with a given length, plot its histogram alongside the 
    probability density function, and return the distribution and the plot.
    
    Parameters:
    - length (int): The length of the distribution to be generated.
    
    Returns:
    -

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import os
import glob
import shutil

def task_func(directory, archive_dir='archive'):
    """
    Archive all JSON files in a given directory by moving them to a specified archive directory.

    Parameters:
    directory (str): The directory where the JSON files are located.
    archive_dir (str): The directory to which the JSON files will be archived. Defaults to 'archive'.

    Retu

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import matplotlib.pyplot as plt
import math


def task_func(frequency, sample_size=10000):
    """
    Create a diagram of a sine wave and cosine wave with a given frequency and return the plot.

    Parameters:
    frequency (float): The frequency of the wave. Must be a non-negative float.
    sample_size (int, Optional): A positive integer integer denoting the numb

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import os
import pandas as pd
import numpy as np

def task_func(excel_file_path, file_name, column_name):
    """
    Calculate the mean, median, and standard deviation of the data from a specific column in an Excel file.

    Parameters:
    - excel_file_path (str): The path to the directory containing the Excel file.
    - file_name (str): The name of the Excel file.
    - column_nam

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.preprocessing import StandardScaler


def task_func(data):
    """Scales numeric columns of a data dictionary using the StandardScaler.

    This function scales the numeric columns of a dataframe using the StandardScaler from scikit-learn.
    Non-numeric columns remain unchanged. If a column contains mixed data types, it tries to convert the entire co

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import re
import random


def task_func(s: str, seed: int = 0) -> pd.DataFrame:
    """
    Generate a Pandas DataFrame of products with their ID, quantity, code, price, product, and description
    based on a specified string of product data.

    The input string is expected to be divided into segments by newlines. Each segment is expected to
    be further split 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt


def task_func(P, T, tensor_shape=(3, 3, 3)):
    """
    Calculate the product of a matrix "P" and a 3D tensor "T" with numpy and then apply PCA to reduce the
    dimensionality of the result. The resulting 2D data is then visualized.
    Note: This function only accepts numpy matrices/arrays.

 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler


def task_func(data: pd.DataFrame) -> (pd.DataFrame, list):
    """
    This function takes a pandas DataFrame and standardizes its features using sklearn's StandardScaler,
    which standardizes features by removing the mean and scaling to unit variance.
    After standardization, it 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import subprocess
import pandas as pd

def task_func(script_path, output_file_path):
    """
    Executes a script to produce a CSV, reads the CSV, and plots a bar graph from the data.

    This function runs the provided script, which should generate a CSV file at the specified output path.
    The CSV must have exactly two columns. It then reads this CSV into a DataFrame and plots a 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import json
from enum import Enum

class Color(Enum):
    RED = 1
    GREEN = 2
    BLUE = 3


def task_func(my_obj):
    """
    Serializes an object into a JSON string with support for complex data types like Enum.
    The function uses a custom JSONEncoder to handle Enum types by converting them to their names or values.

    Parameters:
    my_obj (object): The object to be seriali

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import random
import string
import pandas as pd


def task_func(data_list, seed=0):
    """
    Replace a random substring (a sequence of characters between two commas or at the beginning/end of the string)
    in a list of strings with a random string (comprising ascii lowercase characters) with the same length as
    the substituted characters.

    Parameters:
    data_list (list): 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
import pandas as pd


def task_func(df: pd.DataFrame, column_name: str, pattern: str) -> pd.DataFrame:
    """
    Reverse the order of words in a specific column of a pandas DataFrame where the words
    match a user-specified regular expression pattern, using a nested helper function.
    Words are considered to be whitespace-separated strings. This function maintains the
 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from datetime import datetime, timedelta
import random


def task_func(epoch_milliseconds, seed=0):
    """
    Generate user activity logs from a given epoch time to the current time.

    This function iterates from the starting epoch time to the current system
    time, incrementally increasing the time by a random number of seconds (an
    integer in [1, 10]) be

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import numpy as np


def task_func(days, random_seed=0):
    """
    Generates a spending report DataFrame for the given number of days.

    This function takes a number of days as input and populates a pandas DataFrame
    with fake expenditure data indexed by date. Each day on or after '2023-01-01'
    has its own row. The DataFrame has five columns: Groceries, E

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import hashlib
import hmac

def task_func(secret, message):
    """
    Generates an HMAC (Hash-based Message Authentication Code) signature for a given message using a secret key.
    The function uses SHA-256 as the hash function to create the HMAC signature.

    Parameters:
    secret (str): The secret key used for HMAC generation.
    message (str): The message for which the HMAC 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import numpy as np


def task_func(column, data):
    """
    Analyze a list of sales data, calculate the sum, the mean, the minimum, the maximum of a given column,
    and return the bar chart plot for the given column without displaying it.

    Parameters:
    column (str): The column to analyze. Expected values are ['Product', 'Quantity Sold', 'Total Sales'].
  

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from scipy.spatial.distance import pdist, squareform


def task_func(array):
    """
    Generate a Pandas DataFrame from a 2D list and calculate a distance matrix.

    This function converts a 2D list into a DataFrame, with columns named alphabetically starting from 'A'.
    It uses the `chr()` function, which converts an integer to its corresponding Unicode chara

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import secrets
import hashlib
import base64

def task_func(num, from_base, to_base, alphabet):
    """
    Converts a number from one base to another, adds a random salt, hashes the result using SHA-256,
    and then encodes the hash in base64 using a custom alphabet. The function also returns the used salt.

    Parameters:
    num (str): The number to be converted,

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import sqlite3
import pandas as pd
import os


def task_func(db_name, table_name, csv_path="data.csv"):
    """
    Read SQLite3 table via pandas and export to a CSV file.

    Parameters:
    - db_name (str): The path to the SQLite3 database.
    - table_name (str): The name of the table to export.
    - csv_path (str, optional): The path where the CSV file will be saved. Defaults to 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import base64
import pandas as pd


def task_func(df):
    """
    Encodes a dict of list as a Base64 string. The dict is first converted to a Pandas DataFrame.
    Then convert the data franme to CSV format and encoded to bytes, finally encoded it to a Base64 string.

    Parameters:
        df (dict of list): A dictionary where the key 'Word' maps to a list of strings.

    Returns:


Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from scipy.spatial import distance
import matplotlib.pyplot as plt


def task_func(a, b):
    """
    Calculate the Euclidean distance between two lists, create a Pandas DataFrame from these lists
    with indices 'A' and 'B', and then draw the values with a line displaying the Euclidean distance.

    Parameters:
    a (list): A list of numbers.
    b (list): Anoth

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import ctypes
import hashlib
import binascii

def task_func(filepath):
    """
    Loads a DLL file from a given filepath, calculates its MD5 and SHA256 hashes,
    and prints these hashes in hexadecimal format. This function is a demonstration
    of file handling, usage of the hashlib library for hash calculations, and binascii
    for hexadecimal conversion. Note that the actual ope

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import pandas as pd


def task_func(array_length=100):
    '''
    Generate two arrays of random numbers of a given length, calculate their mean, median, and standard deviation,
    then store these results in a Panda DataFrame 'statistics' with keys 'Array1' and 'Array2'.
    Draw a bar chart to compare these statistics with indices 'Mean', 'Median', and 'Standard D

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import unicodedata
import requests

URL = 'https://api.github.com/users/'

def task_func(username):
    """
    Retrieves user information from the GitHub API for a given username, normalizes all string data to ASCII,
    and returns a dictionary of the normalized data. This function demonstrates data retrieval from a web API
    and handling of Unicode data normalization.

    Paramet

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Constants defining the range of random integers and the size of the DataFrame
RANGE = 100
SIZE = 1000


def task_func():
    """
    Generates a DataFrame with two columns, 'X' and 'Y', each filled with random integers within a specified range,
    and plots these points using a scatter plot

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import random
import bisect
from array import array


def task_func(n=10, total=100):
    """
    Generates 'n' random integer numbers such that their sum equals 'total', sorts these numbers,
    and determines the position where a new random number can be inserted to maintain the sorted order.
    The function uses a retry mechanism to ensure the generated numbers sum up to 'total'.



Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import seaborn as sns
import time

def task_func(df, letter):
    """
    Filters rows in a DataFrame based on the starting letter of the values in the 'Word' column.
    It then calculates the lengths of these words and returns a box plot representing the distribution
    of these lengths.

    Parameters:
    - df (pd.DataFrame): The input DataFrame containing a 'Word' column with st

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import matplotlib.pyplot as plt
from random import sample

# Constants for column names to use in plots
COLUMNS = ['A', 'B', 'C', 'D', 'E']


def task_func(df: pd.DataFrame, tuples: list, n_plots: int) -> (pd.DataFrame, list):
    '''
    Remove rows from a dataframe based on column values and generate random scatter plots.

    Parameters:
    - df (pd.DataFrame): 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import pandas as pd

# Constants
RANGE = (1, 100)

def task_func(L):
    '''
    Generates a DataFrame filled with random integers. The dimensions of the DataFrame (number of rows and columns)
    are determined by multiplying pairs of integers from nested lists within the input list of lists 'L'.
    
    Requirements:
    - numpy
    - pandas

    Parameters:
    L

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

from random import randint
from statistics import mean
import pandas as pd


def task_func(products_list):
    """
    This function takes in a list of product names and generates random sales data for each product over a period of
    12 months. It then calculates the average sales for each product and returns the results as a pandas DataFrame with
    columns: 'Product', 'Month 1', '

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
from nltk.corpus import stopwords


def task_func(text: str) -> dict:
    """
    Analyzes a given text string by removing duplicate words and stopwords defined by nltk.corpus ,
    and then returns a frequency distribution of the remaining words.

    Parameters:
    - text (str): The text string to analyze.

    Returns:
    - dict: The frequency distribution of the words i

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import os
import re
import pandas as pd


def task_func(pattern: str, directory: str, output_csv: str) -> pd.DataFrame:
    """
    Searches for files in the specified directory that match a given regex pattern.
    This function walks through the directory, matches filenames against the pattern,
    and saves the matched file paths to a CSV file. It returns a DataFrame of these paths


Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import os
import pandas as pd
from dateutil.parser import parse
OUTPUT_DIR = './output'

def task_func(csv_path=os.path.join(OUTPUT_DIR, 'data.csv'), date_column='date'):
    """
    Read a CSV file, convert a column of date strings into datetime objects,
    and draw a histogram of the year distribution of these dates.

    Parameters:
    - csv_path (str): The path to the CSV file. D

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import matplotlib.pyplot as plt
import seaborn as sns


def task_func(dataframe, target_value='332'):
    """
    Searches a given DataFrame for occurrences of a specified target value and visualizes these occurrences using a heatmap.

    Parameters:
    - dataframe (pd.DataFrame): The input DataFrame to search.
    - target_value (str, optional): The value to search for in the DataFr

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA


def task_func(x, y, labels):
    """ 
    Perform Principal Component Analysis (PCA) on "x" as x-values and "y" as y-values and record the results with labels.

    Parameters:
    x (list): List of numpy arrays representing the x-values of the data points.
    y (list): List of numpy arrays repr

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import os
import random
import json

def task_func(directory, n):
    """
    Create n random files in a directory with json content with the key 'number' and a random integer value between 1 and 100, and then reset the cursor to the beginning of each file.

    Parameters:
    - directory (str): The directory in which to generate the files.
    - n (int): The number of files to genera

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from collections import Counter

def task_func(df):
    """
    Calculate the frequency of combinations of elements in a DataFrame.
    The function adds a 'combination' column to the DataFrame, which is the combination of items in each row.
    It then calculates the frequency of each combination.
    
    Parameters:
    - df (pandas.DataFrame): The input DataFram

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from scipy.stats import mode

def task_func(list_of_lists):
    """
    Merges a predefined set of lists into a list and finds the mode of the elements in the list.

    Parameters:
    - list_of_lists (list): The list to be processed.

    Returns:
    - tuple: The mode and count of the mode in the merged list.
        - mode_value (np.array): The value that appears

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.preprocessing import StandardScaler


def task_func(tuples_list, columns):
    """
    Convert a list of tuples into a Pandas DataFrame, perform a default scaling in each column, and return the transformed DataFrame.
    
    Parameters:
    - tuples_list (list): The list of tuples.
    - columns (list): The list of column names.
    
    Returns:
    -

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.decomposition import PCA

def task_func(df):
    """
    Perform Principal Component Analysis (PCA) on the DataFrame and record the first two main components.
    
    Parameters:
    - df (DataFrame): The pandas DataFrame.
    
    Returns:
    - df_pca (DataFrame): The DataFrame with the first two principal components named 'PC1' and 'PC2' as columns.

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import base64
import re
from html import unescape
import textwrap

def task_func(raw_string, line_length):
    """
    Decode a raw string from base64, decouple HTML entities, replace multiple spaces with a single space, strip leading and subsequent spaces, and wrap text to a certain line length.

    Parameters:
    - raw_string (str): The base64 encoded string.
    - line_length (int

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import sys
from configparser import ConfigParser

# Constants
PATH_TO_APPEND = '/path/to/whatever'
CONFIG_FILE = '/path/to/config.ini'

def task_func(path_to_append=PATH_TO_APPEND, config_file=CONFIG_FILE):
    """
    Add a specific path to sys.path and update a configuration file with this path.

    Parameters:
    - path_to_append (str): The path to append to sys.path. Default is '

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pickle
import os

# Constants
FILE_NAME = 'save.pkl'

def task_func(dt):
    """
    Save the date time object "dt" in the pickle file "save.pkl" and then read it back for validation.

    Parameters:
    - dt (datetime): The datetime object to be saved.

    Returns:
    - loaded_dt (datetime): The loaded datetime object from 'save.pkl'.

    Requirements:
    - pickle
    - os

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression


def task_func(df, target_column, target_values=None):
    """
    Replace all elements in DataFrame columns that are not present in the target_values array with zeros, and then perform a linear regression using the target column.

    Parameters:
        df (DataFrame): The input pandas DataFrame

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

def task_func(num_samples, countries=['Russia', 'China', 'USA', 'India', 'Brazil'], 
           ages=np.arange(18, 60), genders=['Male', 'Female'], rng_seed=None):
    """
    Generate a demographic dataset with information about people from different countries, their age, and gender. 
    Genders ar

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

from collections import Counter
import random
import string

# Constants
LETTERS = string.ascii_letters

def task_func(list_of_lists):
    """
    If you have a nested list, replace each sublist with a random letter and return a count of each letter in the final list.

    Parameters:
    - list_of_lists (list): A nested list.

    Returns:
    - dict: A dictionary containing count of 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
import os
from pathlib import Path
import csv

def task_func(directory: str, pattern: str = r'^(.*?)-\d+\.csv$') -> list:

    """
    Processes CSV files in a directory based on a specified pattern and creates new files with altered names while preserving the content, you've laid out a solid foundation with your initial tests.

    Parameters:
    - directory (str): The path

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
import os
import zipfile

def task_func(directory, pattern=r'^(.*?)-\d+\.zip$'):
    """
    Unzip all zip files in a directory whose name matches a certain pattern by splitting the filename the last time "-" occurs and using the prefix part of the filename as the directory to extract.
    
    Parameters:
    - directory (str): The directory where the zip files are located.


Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import os
import re

def task_func(directory):
    """
    Finds all files in the specified directory whose names contain any type of 
    bracket (round, curly, or square).

    Uses an internal constant BRACKET_PATTERN = '[(){}\\[\\]]', which specifies
    the brackets that are looked for.

    
    Parameters:
    directory (str): The directory path to search in.
    
    Returns:
 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
import nltk
nltk.download('stopwords')

from nltk.corpus import stopwords

from collections import Counter

# Constants
STOPWORDS = set(stopwords.words('english'))

def task_func(text, n=2):
    """
    Remove duplicate and stopwords from a string "text."
    Then, generate a count of n-grams (default is bigrams) in the text.

    Parameters:
    - text (str): The text string

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
import string

# Constants
PUNCTUATION = string.punctuation

def task_func(text):
    """
    Divide a string into words, remove punctuation marks and convert them to lowercase letters.

    Parameters:
    - text (str): The input string.

    Returns:
    - cleaned_words (list): A list of cleaned words.

    Requirements:
    - re
    - string

    Example:
    >>> task_func

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import binascii
import io
import gzip

def task_func(compressed_hex):
    """
    Uncompress a gzip-compressed hexadecimal string and decrypt the result to UTF-8.
    
    Parameters:
    - compressed_hex (str): The gzip-compressed hexadecimal string.
    
    Returns:
    - decoded_string (str): The decoded and decompressed string in UTF-8 format, or an error message.
    
    Require

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
import os
import string
import random

def task_func(input_string, directory='./text_files'):
    """
    Split a multi-line string into separate strings, remove special characters, and save each string as a separate text file.
    
    Parameters:
    - input_string (str): The multi-line string to be split and saved.
    - directory (str): The directory where the text files 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import sklearn.model_selection as model_selection
import sklearn.svm as svm
import sklearn.datasets as datasets
import sklearn.metrics as metrics

def task_func():
    """
    Perform an SVM classification of the iris dataset and warn if the accuracy is less than 0.9.

    Parameters:
    - None

    Returns:
    tuple: A tuple containing:
        - accuracy (float): The accuracy of th

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

from itertools import zip_longest
from scipy.spatial import distance

def task_func(points):
    """
    Calculate the Euclidean distances between consecutive points in a provided 
    list of 2D coordinates.

    This function takes a list of tuples, where each tuple contains two numbers
    representing a point in 2D space. It computes the Euclidean distance between
    each consecut

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA

def task_func(data, n_components=2):
    """
    Perform PCA (Principal Component Analysis) on the provided DataFrame.

    This function takes a pandas DataFrame, scales the data using sklearn 
    StandardScaler, and then applies PCA to reduce 
    the number of dimensions of t

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import matplotlib.pyplot as plt

# Constants
ARRAY_SIZE = 10000

def task_func():
    """
    Create a numeric array of random integers, calculate the mean and standard deviation, and draw a histogram of the distribution.

    Note:
        The random integers are generated between 1 and 100. The title of the histogram is "Histogram of Random Integers". 
        The 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.linear_model import LinearRegression

def task_func(d, target='z'):
    """
    Perform linear regression to "x," "y," against "z" from a list of dictionaries "d."

    Parameters:
    d (list): A list of dictionaries.
    target (str): The target variable for the regression.

    Returns:
    LinearRegression: A LinearRegression model.

    Requirement

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import zscore

def task_func(df, z_threshold=2):
    """
    Identifies and plots outliers in the 'closing_price' column of a given DataFrame using the Z-Score method.
    
    Parameters:
    df (pandas.DataFrame): The input DataFrame that must contain a column named 'closing_price' with numerical values.
    z_thresh

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import sqlite3

def task_func(db_path: str, table_name: str, column_name: str) -> pd.DataFrame:
    """
    Loads data from an SQLite database into a Pandas DataFrame and performs a string replacement operation
    on a specified column. Specifically, replaces all occurrences of the newline character '\n' with the HTML line
    break tag '<br>'.
    
    Requirement

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import matplotlib.pyplot as plt
import string

# Constants
ALPHABET = list(string.ascii_lowercase)

def task_func(word):
    """
    Draws a bar chart representing the positions of each letter in the given word 
    within the English alphabet using numpy and matplotlib.pyplot.
    
    Parameters:
    word (str): The word whose letters' positions will be plotted. 
 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import pandas as pd
import random

def task_func(rows=3, cols=2, min_val=0, max_val=100, seed=0):
    """
    Creates a matrix of specified dimensions with random integers within a given range,
    and then converts it into a pandas DataFrame.
    
    Parameters:
    - rows (int): Number of rows in the matrix. Default is 3.
    - cols (int): Number of columns in the

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import matplotlib.pyplot as plt
import re
from collections import Counter


def task_func(mystrings, text):
    """
    Replace spaces in given words with underscores, then plots the frequency of each unique word.

    Parameters:
    - mystrings (list of str): List of words/phrases where spaces need to be replaced with underscores.
    - text (str): The text in whic

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import os
import glob
from collections import Counter


def task_func(directory, extensions=[".txt", ".docx", ".xlsx", ".csv"], keep_zero=True):
    """
    Traverses a given directory recursively to count files by specified extensions.

    Parameters:
    - directory (str): The path of the directory to search.
    - extensions (list of str): File extensions to count. Defaults to ['.t

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pandas as pd


def task_func(df: pd.DataFrame) -> pd.DataFrame:
    """
    Computes the MinMax-normalized cumulative sum for each numeric column in the given DataFrame.

    Parameters:
    - df (pandas.DataFrame): The input DataFrame containing numerical values.

    Returns:
    - pd.DataFrame: A DataFrame wher

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler


def task_func(records: np.ndarray, random_seed: int = 0) -> pd.DataFrame:
    """
    Randomly shuffle the given array's features, normalize its values, then convert to a DataFrame
    with shuffled feature names.

    Parameters:
    - records (np.ndarray): A 2D numpy array with each row as a rec

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler


def task_func(df):
    """
    Plots the correlation matrix from numeric columns in a DataFrame and returns a DataFrame
    where the numeric columns are standardized to have mean 0 and variance 1.

    Parameters:
    df (pandas.DataFrame): Input DataFrame with c

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import binascii
import string
import random

def task_func(length):
    """
    Generate a random hexadecimal string of a given length and then attempt to decode it in ASCII.
    The resulting ASCII string may contain non-printable characters
    or be shorter than the input length.

    Parameters:
    length (int): The length of the hexadecimal string.

    Returns:
    str: The deco

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import urllib.request
import os
import json
import pandas as pd

# Constants
TARGET_JSON_FILE = "downloaded_file.json"


def task_func(url):
    """
    This function retrieves a JSON file from the given URL using urllib.request.urlretrieve,
    temporarily saving it as 'downloaded_file.json'. It then opens and reads this file,
    converts the JSON content into a pandas DataFrame, and

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import requests
from PIL import Image
import io


def task_func(url):
    """
    Fetches an image from a given URL and returns it as a PIL Image object.

    Parameters:
    - url (str): The URL of the image to download. It should be a valid HTTP or
      HTTPS URL pointing directly to an image file.

    Returns:
    - PIL.Image.Image: A PIL Image object representing the downloaded i

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report


def task_func(csv_file_path, target_column="target", test_size=0.2, n_estimators=100):
    """
    Processes a CSV file to train a Random Forest classifier and generates a formatted classification report.

    P

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler

# Constants
PLOT_TITLE = "Scaled Values"


def task_func(data_dict):
    """
    Scales the values in a given dictionary using MinMaxScaler and plots the scaled data.

    Parameters:
    - data_dict (dict): A dictionary where keys represent column names and values are lists of numerical

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt


def task_func(feature: pd.Series, target: pd.Series) -> (np.ndarray, plt.Axes):
    """
    Train a logistic regression model on one feature and evaluate its perform

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import socket
import select
import queue
from datetime import datetime, timedelta


def task_func(
    server_address="localhost", server_port=12345, buffer_size=1024, run_duration=5
):
    """
    Run a non-blocking echo server that appends the server's current time to received data and sends it back to the client, while handling exceptional conditions for each socket.

    Parameters

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

from datetime import datetime
import random
import matplotlib.pyplot as plt


def task_func(date_str):
    """
    Generates a list of random integers, where the count of integers equals the day of the month in the
    provided date, then generates a line plot of these integers and returns the Axes object of the plot.

    Parameters:
    - date_str (str): The date string in "yyyy-mm-d

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
import matplotlib.pyplot as plt

# Constants
STOP_WORDS = ["a", "an", "the", "in", "on", "at", "and", "or"]


def task_func(file_path, save_path=None):
    """
    This function processes a text dataset from a CSV file, performs text vectorization while excluding specific
    stopwords, and creates a histog

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import itertools
import seaborn as sns
import matplotlib.pyplot as plt

# Constants
SHAPES = [
    "Circle",
    "Square",
    "Triangle",
    "Rectangle",
    "Pentagon",
    "Hexagon",
    "Heptagon",
    "Octagon",
    "Nonagon",
    "Decagon",
]
COLORS = [
    "Red",
    "Blue",
    "Green",
    "Yellow",
    "Black",
    "White",
    "Purple",
    "Orange",
    "Pink",
    "Brown"

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import pandas as pd
from matplotlib import pyplot as plt


def task_func(arr):
    """
    Calculate the sum of each row in a 2D numpy array and plot these sums as a time series.

    This function takes a 2D numpy array and computes the sum of elements in each row. It
    then creates a Pandas DataFrame with these row sums and plots them as a time series,
    using dates starting from

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
import matplotlib.pyplot as plt


def task_func(arr):
    """
    Analyzes the distribution of values in a NumPy array to determine if it is uniform and
    generates a histogram representing this distribution.

    Parameters:
    - arr (numpy.ndarray): A NumPy array containing the values to be analyzed. 
      The array can contain any hashable data type (e.g., int

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import re
from collections import Counter
import matplotlib.pyplot as plt


def task_func(text):
    """
    Analyzes the frequency of words in a given text after lowercasing, removing punctuation, splitting into words,
    and plots the top 10 most common words.

    Parameters:
    - text (str): The input text to be analyzed.

    Returns:
    - list: A list of tuples containing the 

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import numpy as np
from collections import Counter


def task_func(list_of_tuples):
    """
    Computes the sum of numeric values and counts the occurrences of categories in a list of tuples.

    Each tuple in the input list contains a numeric value and a category. This function calculates
    the sum of all the numeric values and also counts how many times each category appears in t

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

from nltk.tokenize import RegexpTokenizer
from string import punctuation
import os


def task_func(text, output_filename):
    """
    Extracts words from the input text that begin with the '$' character and saves them to a specified file,
    excluding any words that are solely composed of punctuation characters.

    This function is useful for processing texts where '$' is used to d

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import subprocess
import shlex
from datetime import datetime

def task_func(script_path: str) -> dict:
    '''
    Run an R script and return the start time, end time, decoded stdout, and decoded stderr as a dictionary.
    
    Requirements:
    - subprocess
    - shlex
    - datetime
    
    Parameters:
    - script_path (str): Path to the R script to be executed.
    
    Returns:


Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

from collections import Counter
from operator import itemgetter
import itertools


def task_func(word_dict):
    """
    Given a dictionary of words as keys and letters as values, count the frequency of each letter in the words.
    
    Parameters:
    word_dict (dict): The dictionary with words as keys and their letters as values.
    
    Returns:
    dict: A dictionary with letters

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import os
import hashlib
import json
from pathlib import Path

def task_func(directory: str) -> str:
    """
    Create SHA256 hashes for all files in the specified directory, including files in subdirectories, 
    and save these hashes in a JSON file named 'hashes.json' in the given directory.

    Parameters:
    - directory (str): The path to the directory containing files to be ha

Prompt:
1. Complete the following starter code consisting of import statements, function signature and docstring.
2. Your output must include only the function body with proper Python code and indentation.
3. Do not include phrases like “Completion:” or any other headings, do not show example usage, do not show test cases, do not provide explanatory text.
4. Your output must be with no additional text, no extra symbols, no code fences (```), and no other content before or after the function body.
5. Stop generating immediately after the return statement or final line of the function body.

Starter Code:

import bs4
import requests
import re
import json

def task_func(url: str, output_path: str) -> list:
    """
    Extracts phone numbers from a given URL or local file and saves them to a specified file in JSON format.

    Parameters:
    - url (str): The URL of the webpage to scrape or the local file path prefixed with 'file://'.
    - output_path (str): The path where the extracted p

In [41]:
# if the above cell has not been run
#file_name = 'logs2/solar_10B_samples_20241127_232205_5243.jsonl'

In [16]:
# count how many full funcs were generated vs. only func bodies without func signatures
from collections import Counter
Counter(is_full_func)

Counter({True: 471, False: 29})

In [17]:
evaluate_functional_correctness(file_name, k=[1], mode='big_code')

Reading samples...


500it [00:00, 31057.88it/s]


Running test suites...


100%|██████████████████████████████████████████████████████████████████| 500/500 [00:32<00:00, 15.33it/s]


Writing results to logs/solar_10B_samples_20250124_235810_7835.jsonl_results.jsonl...


100%|███████████████████████████████████████████████████████████████| 500/500 [00:00<00:00, 87516.25it/s]


{'pass@1': 0.434}

## Getting Model Temperature and Top_p
According to [Mistral docs](https://docs.mistral.ai/api/#tag/chat/operation/chat_completion_v1_chat_completions_post):
* Default top_p is 1
* To find out about the default temperature, one neexs to call the `/models` endpoint to retrieve the appropriate value.

In [70]:
# calling the /models endpoint
import requests

api_key = os.environ["MISTRAL_API_KEY"]

# Correct API endpoint
url = "https://api.mistral.ai/v1/models"

# Headers with the correct authorization format
headers = {
    "Authorization": f"Bearer {api_key}",
    "Accept": "application/json"
}

try:
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Raise an error for HTTP 4xx/5xx codes
    print("Response:")
    print(response.json())
except requests.exceptions.HTTPError as http_err:
    print(f"HTTP error occurred: {http_err}")
    print(f"Response: {response.text}")
except Exception as e:
    print(f"An error occurred: {e}")


Response:
{'object': 'list', 'data': [{'id': 'ministral-3b-2410', 'object': 'model', 'created': 1736121321, 'owned_by': 'mistralai', 'capabilities': {'completion_chat': True, 'completion_fim': False, 'function_calling': True, 'fine_tuning': True, 'vision': False}, 'name': 'ministral-3b-2410', 'description': 'Official ministral-3b-2410 Mistral AI model', 'max_context_length': 131072, 'aliases': ['ministral-3b-latest'], 'deprecation': None, 'default_model_temperature': 0.3, 'type': 'base'}, {'id': 'ministral-3b-latest', 'object': 'model', 'created': 1736121321, 'owned_by': 'mistralai', 'capabilities': {'completion_chat': True, 'completion_fim': False, 'function_calling': True, 'fine_tuning': True, 'vision': False}, 'name': 'ministral-3b-2410', 'description': 'Official ministral-3b-2410 Mistral AI model', 'max_context_length': 131072, 'aliases': ['ministral-3b-2410'], 'deprecation': None, 'default_model_temperature': 0.3, 'type': 'base'}, {'id': 'ministral-8b-2410', 'object': 'model', 'cr

In [72]:
response.json()

{'object': 'list',
 'data': [{'id': 'ministral-3b-2410',
   'object': 'model',
   'created': 1736121321,
   'owned_by': 'mistralai',
   'capabilities': {'completion_chat': True,
    'completion_fim': False,
    'function_calling': True,
    'fine_tuning': True,
    'vision': False},
   'name': 'ministral-3b-2410',
   'description': 'Official ministral-3b-2410 Mistral AI model',
   'max_context_length': 131072,
   'aliases': ['ministral-3b-latest'],
   'deprecation': None,
   'default_model_temperature': 0.3,
   'type': 'base'},
  {'id': 'ministral-3b-latest',
   'object': 'model',
   'created': 1736121321,
   'owned_by': 'mistralai',
   'capabilities': {'completion_chat': True,
    'completion_fim': False,
    'function_calling': True,
    'fine_tuning': True,
    'vision': False},
   'name': 'ministral-3b-2410',
   'description': 'Official ministral-3b-2410 Mistral AI model',
   'max_context_length': 131072,
   'aliases': ['ministral-3b-2410'],
   'deprecation': None,
   'default_mode

In [79]:
for i in response.json()['data']:
    print(f"Model: {i['id']}. Temperature: {i['default_model_temperature']}")

Model: ministral-3b-2410. Temperature: 0.3
Model: ministral-3b-latest. Temperature: 0.3
Model: ministral-8b-2410. Temperature: 0.3
Model: ministral-8b-latest. Temperature: 0.3
Model: open-mistral-7b. Temperature: 0.7
Model: mistral-tiny. Temperature: 0.7
Model: mistral-tiny-2312. Temperature: 0.7
Model: open-mistral-nemo. Temperature: 0.3
Model: open-mistral-nemo-2407. Temperature: 0.3
Model: mistral-tiny-2407. Temperature: 0.3
Model: mistral-tiny-latest. Temperature: 0.3
Model: open-mixtral-8x7b. Temperature: 0.7
Model: mistral-small. Temperature: 0.7
Model: mistral-small-2312. Temperature: 0.7
Model: open-mixtral-8x22b. Temperature: 0.7
Model: open-mixtral-8x22b-2404. Temperature: 0.7
Model: mistral-small-2402. Temperature: 0.7
Model: mistral-small-2409. Temperature: 0.7
Model: mistral-small-latest. Temperature: 0.7
Model: mistral-medium-2312. Temperature: 0.7
Model: mistral-medium. Temperature: 0.7
Model: mistral-medium-latest. Temperature: 0.7
Model: mistral-large-2402. Temperature

In [2]:
complete_code_prompt = '''
**Role**
You are a Python software programmer.

**Instructions**
As a programmer, you are required to complete the following Python code.

**Code Formatting**
Output the completed code in the following format:
```python
[Code]
```

**Code To Be Completed**
'''
print(complete_code_prompt + 'New line')


**Role**
You are a Python software programmer.

**Instructions**
As a programmer, you are required to complete the following Python code.

**Code Formatting**
Output the completed code in the following format:
```python
[Code]
```

**Code To Be Completed**

New line


In [None]:
complete_code_prompt = '''
**Role**
You are a Python software programmer.

**Instructions**
As a programmer, you are required to complete the following Python code.

**Code Formatting**
Output the completed code in the following format:
```python
[Code]
```

**Code To Be Completed**
{}
'''
print(complete_code_prompt. + 'New line')