In [10]:
from dotenv import load_dotenv
from openai import OpenAI
from tenacity import retry, wait_random_exponential, stop_after_attempt

from assistant.tools import fetch_github_repo

In [13]:
load_dotenv()

GPT_MODEL = 'gpt-3.5-turbo'

client = OpenAI()



In [15]:
response = client.chat.completions.create(
  model=GPT_MODEL,
  messages=[
    {'role': 'system', 'content': 'You are a helpful assistant.'},
    {'role': 'user', 'content': 'Who won the world series in 2020?'},
    {'role': 'assistant', 'content': 'The Los Angeles Dodgers won the World Series in 2020.'},
    {'role': 'user', 'content': 'Where was it played?'}
  ]
)


ChatCompletion(id='chatcmpl-9hv9UvhuMOjnOM8S3OC4v80wcDYky', choices=[Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content='The 2020 World Series was played at Globe Life Field in Arlington, Texas.', role='assistant', function_call=None, tool_calls=None))], created=1720254824, model='gpt-3.5-turbo-0125', object='chat.completion', service_tier=None, system_fingerprint=None, usage=CompletionUsage(completion_tokens=17, prompt_tokens=53, total_tokens=70))

In [25]:
response.choices[0].message.content

'The 2020 World Series was played at Globe Life Field in Arlington, Texas.'

In [31]:
@retry(wait=wait_random_exponential(multiplier=1, max=40), stop=stop_after_attempt(3))
def chat_completion_request(messages, tools=None, tool_choice=None, model=GPT_MODEL):
    try:
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            tools=tools,
            tool_choice=tool_choice,
        )
        return response
    except Exception as e:
        print('Unable to generate ChatCompletion response')
        print(f'Exception: {e}')
        return e

In [32]:
class Agent:
    def __init__(self, model=GPT_MODEL):
        self.model = model
        self.memory = []
        self.tools = []
        
    def invoke(self, message):
        self.memory.append(
            {'role': 'user', 'content': message}
        )
        chat_response = chat_completion_request(
            messages=self.memory,
            tools=self.tools,
            model=self.model,
        )

        if chat_response.choices[0].finish_reason == 'stop':
            chat_response_message = chat_response.choices[0].message.content
            self.memory.append(
                {'role': 'assistant', 'content': chat_response_message}
            )
            return chat_response_message

        elif chat_response.choices[0].finish_reason == 'tool_calls':
            # tool_calls = chat_response.choices[0].message.tool_calls
            # for tool_call in tool_calls:
            pass
                




In [37]:
agent = Agent()

agent.memory.append(
    {'role': 'system', 'content': '''You are an expert at reading code and also developing programs. You will be given some data from a github repo. 
    Primary Objectives: 
    1. You should identify only relevant files and folders 
    For example: .gitattributes, .gitignore might be irrelevant
    2. Convert any notebook code (e.g. .ipynb) into more readable python
    For example: ```{\n   "cell_type": "code",\n   "execution_count": 10,\n   "metadata": {},\n   "outputs": [],\n   "source": [\n    "from dotenv import load_dotenv\\n",\n    "from openai import OpenAI\\n",\n    "from tenacity import retry, wait_random_exponential, stop_after_attempt"\n   ]\n  }```
    Should become: ``` from dotenv import load_dotenv
from openai import OpenAI
from tenacity import retry, wait_random_exponential, stop_after_attempt```
    3. Once the above 2 steps have been done, give me output in the following format:
    
    
     ## Repo Structure
     (insert repo structure here)

     ## File contents
     file1.py
     ```file1.py contents```

     file2.ipynb
     ```file2.ipynb contents```
    
'''}
)

agent.tools.append(
    {
        'type': 'function',
        'function': {
            'name': 'placeholder_tool',
            'description': 'Placeholder tool not to be used',
            'parameters': {
                'type': 'object',
                'properties': {
                    'placeholder_property': {
                        'type': 'string',
                        'description': 'Placeholder property',
                    },
                    'placeholder_property_2': {
                        'type': 'integer',
                        'description': 'Placeholder property 2'
                    },
                },
                'required': ['placeholder_property', 'placeholder_property_2']
            }
        }
    }
)


repo_data = fetch_github_repo("https://github.com/cetyz/coding-ai")

response = agent.invoke(str(repo_data))
print(response)

# while True:
#     user_input = input('User Message:')
#     if user_input == 'exit':
#         break
#     response = agent.invoke(user_input)
#     print('Assistant:', response)
#     print()

## Relevant Files and Folders
- README.md
- assistant/tools.py
- playground.ipynb
- requirements.txt

## File Contents

### README.md
```
# AI Coding Assistant
 
An OpenAI-powered assistant that will get actual code from your github repository.
Then ask it questions and it will help you.

## Benefits
- No need to copy and paste snippets of your code since the assistant is able to retrieve your actual code
- Holistic project understanding: the assistant can better assist with overall project development since it can see your whole repo

## Weaknesses
- This is still work in progress, I'll let you know
```

### assistant/tools.py
```python
import requests
from typing import Dict, Any

def fetch_github_repo(repo_url: str) -> Dict[str, Any]:
    """
    Fetches all files from a given GitHub repository URL and returns their content.
    """
    # Extract owner and repo from the URL
    parts = repo_url.split('/')
    owner, repo = parts[-2], parts[-1]

    # GitHub API URL to get the repo c

In [34]:


# Example usage:
repo_data = str(fetch_github_repo("https://github.com/cetyz/coding-ai"))
print(repo_data)

{'.gitattributes': '# Auto detect text files and perform LF normalization\n* text=auto\n', '.gitignore': "# Byte-compiled / optimized / DLL files\n__pycache__/\n*.py[cod]\n*$py.class\n\n# C extensions\n*.so\n\n# Distribution / packaging\n.Python\nbuild/\ndevelop-eggs/\ndist/\ndownloads/\neggs/\n.eggs/\nlib/\nlib64/\nparts/\nsdist/\nvar/\nwheels/\nshare/python-wheels/\n*.egg-info/\n.installed.cfg\n*.egg\nMANIFEST\n\n# PyInstaller\n#  Usually these files are written by a python script from a template\n#  before PyInstaller builds the exe, so as to inject date/other infos into it.\n*.manifest\n*.spec\n\n# Installer logs\npip-log.txt\npip-delete-this-directory.txt\n\n# Unit test / coverage reports\nhtmlcov/\n.tox/\n.nox/\n.coverage\n.coverage.*\n.cache\nnosetests.xml\ncoverage.xml\n*.cover\n*.py,cover\n.hypothesis/\n.pytest_cache/\ncover/\n\n# Translations\n*.mo\n*.pot\n\n# Django stuff:\n*.log\nlocal_settings.py\ndb.sqlite3\ndb.sqlite3-journal\n\n# Flask stuff:\ninstance/\n.webassets-cach