Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 5 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@ WORKDIR /app

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libc6-dev \
build-essential \
python3-dev \
gcc \
g++ \
&& rm -rf /var/lib/apt/lists/*

# Copy only the requirements file first to leverage Docker cache
Expand Down Expand Up @@ -44,4 +47,4 @@ ENV PYTHONUNBUFFERED=1
EXPOSE 8000

# Run the application
ENTRYPOINT ["python", "optillm.py"]
CMD ["optillm"]
29 changes: 24 additions & 5 deletions optillm.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,11 +246,10 @@ def execute_single_approach(approach, system_prompt, initial_query, client, mode
if hasattr(request, 'json'):
data = request.get_json()
messages = data.get('messages', [])
# Copy all parameters except 'model' and 'messages'
# Copy all parameters except 'stream', 'model' , 'n' and 'messages'
kwargs = {k: v for k, v in data.items()
if k not in ['model', 'messages', 'optillm_approach']}
if k not in ['model', 'messages', 'stream', 'n', 'optillm_approach']}
response = none_approach(original_messages=messages, client=client, model=model, **kwargs)

# For none approach, we return the response and a token count of 0
# since the full token count is already in the response
return response, 0
Expand Down Expand Up @@ -369,6 +368,21 @@ def generate_streaming_response(final_response, model):
# Yield the final message to indicate the stream has ended
yield "data: [DONE]\n\n"

def extract_contents(response_obj):
contents = []
# Handle both single response and list of responses
responses = response_obj if isinstance(response_obj, list) else [response_obj]

for response in responses:
# Extract content from first choice if it exists
if (response.get('choices') and
len(response['choices']) > 0 and
response['choices'][0].get('message') and
response['choices'][0]['message'].get('content')):
contents.append(response['choices'][0]['message']['content'])

return contents

def parse_conversation(messages):
system_prompt = ""
conversation = []
Expand Down Expand Up @@ -523,8 +537,13 @@ def proxy():
result = responses
else:
result, completion_tokens = execute_single_approach(approaches[0], system_prompt, initial_query, client, model)

logger.debug(f'Direct proxy response: {result}')
return jsonify(result), 200

if stream:
return Response(generate_streaming_response(extract_contents(result), model), content_type='text/event-stream')
else :
return jsonify(result), 200

elif operation == 'AND' or operation == 'OR':
if contains_none:
Expand All @@ -545,7 +564,7 @@ def proxy():
messages = tagged_conversation_to_messages(response)
if messages: # Only take the last message if we have any
response = messages[-1]['content']

if stream:
return Response(generate_streaming_response(response, model), content_type='text/event-stream')
else:
Expand Down
2 changes: 1 addition & 1 deletion optillm/plugins/readurls_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def extract_urls(text: str) -> List[str]:
def fetch_webpage_content(url: str, max_length: int = 100000) -> str:
try:
headers = {
'User-Agent': 'optillm/0.0.19 (https://github.com/codelion/optillm)'
'User-Agent': 'optillm/0.0.20 (https://github.com/codelion/optillm)'
}

response = requests.get(url, headers=headers, timeout=10)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

setup(
name="optillm",
version="0.0.19",
version="0.0.20",
packages=find_packages(),
py_modules=['optillm'],
package_data={
Expand Down