In [17]:
import base64
import json

from dotenv import load_dotenv

load_dotenv()

import os

from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.llms import OpenAI

import requests
import pandas as pd


In [18]:
def qa(file_path, prompt):

    loader = TextLoader(file_path)

    documents = loader.load()
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
    texts = text_splitter.split_documents(documents)

    embeddings = OpenAIEmbeddings()
    docsearch = Chroma.from_documents(texts, embeddings)

    qa = RetrievalQA.from_chain_type(llm=OpenAI(), chain_type="stuff", retriever=docsearch.as_retriever())

    return qa.run(prompt)

In [19]:
prompt = """
Fill in X:

The rockies won X games. They lost X games. They played X games.

On average they score X runs per game. The average score of their opponents was X runs per game.

They won X games at home and X games on the road.

The games they won their win probability was X. The games they lost their win probability was X.

Their next game is 7/5/2023 against X. Their win probability is X. They will likely score X runs.

If you don't know, estimate.

"""
rockies_estimate = qa('data/rockies_schedule.csv', prompt)

rockies_estimate

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


'\nThe Rockies won 42 games. They lost 5 games. They played 47 games.\n\nOn average they score 5.7 runs per game. The average score of their opponents was 5.2 runs per game.\n\nThey won 24 games at home and 18 games on the road.\n\nThe games they won their win probability was 0.446. The games they lost their win probability was 0.357.\n\nTheir next game is 7/5/2023 against the Houston Astros. Their win probability is 0.388. They will likely score 4 runs.'

In [20]:
prompt = """

Fill in X:

The astros won X games. They lost X games. They played X games.

On average they score X runs per game. The average score of their opponents was X runs per game.

They won X games at home and X games on the road.

The games they won their win probability was X. The games they lost their win probability was X.

Their next game is 7/5/2023 against X. Their win probability is X. They will likely score X runs.

If you don't know, estimate.

"""
astros_estimate = qa('data/astros_schedule.csv', prompt)

astros_estimate

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1


' The Astros won 35 games, lost 24 games, and played 59 games. On average they score 5.7 runs per game and the average score of their opponents was 4.7 runs per game. They won 24 games at home and 11 games on the road. The games they won their win probability was 0.624 and the games they lost their win probability was 0.472. Their next game is 7/5/2023 against the Seattle Mariners. Their win probability is 0.589 and they will likely score 5 runs.'

In [21]:
estimate = rockies_estimate + astros_estimate
with open('data/estimate.txt', 'w') as f:
    f.write(estimate)

In [24]:

prompt = """

Use the file provided to pick a winner for this moneyline bet: Moneyline Colorado Rockies (+185) v Houstan Astros  (-225)

Colorado Rockies (+185): If you bet on the Rockies to win, you're taking the underdog. The "+185" means that if you bet $100, you stand to win $185 (in addition to getting your original $100 stake back) if the Rockies do indeed win the game.

Houston Astros (-225): If you bet on the Astros to win, you're taking the favorite. The "-225" means that you would need to wager $225 in order to win $100 (plus you would get your original $225 stake back) if the Astros win the game.

Your response should be either "Rockies" or "Astros"

"""
winner = qa('data/estimate.txt', prompt)

with open('data/output.txt', 'w') as f:
    f.write(winner)

Number of requested results 4 is greater than number of elements in index 1, updating n_results = 1
