In [17]:
import streamlit as st
import pandas as pd
import numpy as np
import requests
import json
import openai
import os
from dotenv import load_dotenv
from dotenv import dotenv_values

# 🔁 1 - Data product design

In [18]:
#Setting titles
st.title("My First Streamlit App")
st.write("Hello, world!")

#Fetching data 
DATE_COLUMN = 'date/time'
DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
         'streamlit-demo-data/uber-raw-data-sep14.csv.gz')

@st.cache_data
def load_data(nrows):
    data = pd.read_csv(DATA_URL, nrows=nrows)
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis='columns', inplace=True)
    data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
    return data

# Create a text element and let the reader know the data is loading.
data_load_state = st.text('Loading data...')
# Load 10,000 rows of data into the dataframe.
data = load_data(10000)
# Notify the reader that the data was successfully loaded.
data_load_state.text("Done! (using st.cache_data)")


st.subheader('Raw data')
st.write(data)

st.subheader('Number of pickups by hour')
hist_values = np.histogram(
    data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
st.bar_chart(hist_values)


hour_to_filter = st.slider('hour', 0, 23, 17)  # min: 0h, max: 23h, default: 17h
filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]
st.subheader(f'Map of all pickups at {hour_to_filter}:00')
st.map(filtered_data)

2023-05-15 13:59:38.115 No runtime found, using MemoryCacheStorageManager


DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)

# 🔁2 - Data product design

In [19]:
st.subheader('Throw your messy, queasy fabric in !')

# Create three columns for the file uploaders
col1, col2, col3 = st.columns(3)

# File uploader in the first column
with col1:
    uploaded_file1 = st.file_uploader("Fabric 1")
    if uploaded_file1 is not None:
        dataframe1 = pd.read_csv(uploaded_file1)
        st.write(dataframe1)

# File uploader in the second column
with col2:
    uploaded_file2 = st.file_uploader("Fabric 2")
    if uploaded_file2 is not None:
        dataframe2 = pd.read_csv(uploaded_file2)
        st.write(dataframe2)

# File uploader in the third column
with col3:
    uploaded_file3 = st.file_uploader("Fabric 3")
    if uploaded_file3 is not None:
        dataframe3 = pd.read_csv(uploaded_file3)
        st.write(dataframe3)

In [20]:
# Step 2: Connect to the ChatGPT API
def query_chatgpt(prompt):
    load_dotenv()
    api_key = os.getenv('API_KEY')
    print(api_key)
    url = 'https://api.openai.com/v1/chat/completions'
    headers = {
        'Authorization': f'Bearer {api_key}',
        'Content-Type': 'application/json'
    }
    data = {
        'prompt': prompt,
        'max_tokens': 50,
        'temperature': 0.7,
        'n': 1,
        'stop': '\n'
    }
    response = requests.post(url, headers=headers, data=json.dumps(data))
    response_json = response.json()
    print(response_json)
    answer = response_json['choices'][0]['text']
    return answer

# Step 3: Prepare the prompt
def prepare_prompt(data):
    prompt = f"Tell me what are the two most pertinent columns/variables to understand the dataset and explain to me why:\n\n{data}"
    return prompt

# Step 4: Send the prompt to ChatGPT
def send_prompt_to_chatgpt(prompt):
    answer = query_chatgpt(prompt)
    return answer

# Step 5: Display the response
def display_response(response):
    st.write("ChatGPT's Response:")
    st.write(response)


# Streamlit app code
st.title("Dataset Analysis with ChatGPT")
st.subheader('Throw your messy, queasy fabric in !')

# Create three columns for the file uploaders
col1, col2, col3 = st.columns(3)

# File uploader in the first column
with col1:
    uploaded_file1 = st.file_uploader("Fabric 1")
    if uploaded_file1 is not None:
        df1 = pd.read_csv(uploaded_file1)
        first_10_rows1 = df1.head(10)
        prompt1 = prepare_prompt(first_10_rows1)
        response1 = send_prompt_to_chatgpt(prompt1)
        st.write(first_10_rows1)
        display_response(response1)

# File uploader in the second column
with col2:
    uploaded_file2 = st.file_uploader("Fabric 2")
    if uploaded_file2 is not None:
        df2 = pd.read_csv(uploaded_file2)
        first_10_rows2 = df2.head(10)
        prompt2 = prepare_prompt(first_10_rows2)
        response2 = send_prompt_to_chatgpt(prompt2)
        st.write(first_10_rows2)
        display_response(response2)

# File uploader in the third column
with col3:
    uploaded_file3 = st.file_uploader("Fabric 3")
    if uploaded_file3 is not None:
        df3 = pd.read_csv(uploaded_file3)
        first_10_rows3 = df3.head(10)
        prompt3 = prepare_prompt(first_10_rows3)
        response3 = send_prompt_to_chatgpt(prompt3)
        st.write(first_10_rows3)
        display_response(response3)


In [21]:
query_chatgpt('Hello')

None
{'error': {'message': '', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}


KeyError: 'choices'