# ChatGPT Market Data (Redfin) Streamlit App
[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/10zfuRePoOvMmWPkEajcq5ehIbw5LhLHx?usp=sharing
## Overview
| Detail Tag            | Information                                                                                        |
|-----------------------|----------------------------------------------------------------------------------------------------|
| Originally Created By | Ariel Herrera arielherrera@analyticsariel.com |
| External References   | Open AI API |
| Input Datasets        | Source name |
| Output Datasets       | Source name |
| Input Data Source     | Pandas DataFrame |
| Output Data Source    | String |

## History
| Date         | Developed By  | Reason                                                |
|--------------|---------------|-------------------------------------------------------|
| 1st May 2023 | Ariel Herrera | Create notebook. |

## Getting Started
1. Copy this notebook -> File -> Save a Copy in Drive
2. Directions

## Useful Resources
- [Google Colab Cheat Sheet](https://towardsdatascience.com/cheat-sheet-for-google-colab-63853778c093)

## <font color="blue">Install Packages</font>

In [1]:
!pip install -q streamlit langchain openai

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.9/8.9 MB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m727.0/727.0 kB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.9/71.9 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m164.8/164.8 kB[0m [31m10.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m184.3/184.3 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m4.8/4.8 MB[0m [31m17.2 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m82.1/82.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m17.3 MB/s[0m et

## <font color="blue">Streamlit App</font>


In [2]:
%%writefile app.py
import sqlite3
import streamlit as st
import pandas as pd
from sqlalchemy import create_engine
from sqlalchemy.pool import StaticPool
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain


#####################################
#            FUNCTIONS              #
#####################################
@st.cache_data()
def load_data(url):
    """
    load data from url
    """
    df = pd.read_csv(url)
    return df

def prepare_data(df):
    """
    lowercase columns
    """
    df.columns = [x.replace(' ', '_').lower() for x in df.columns]
    return df


#####################################
#        LOCALS & CONSTANTS         #
#####################################
table_name = 'statesdb'
uri = "file::memory:?cache=shared"
default_region_list = ['Alabama','Arizona','Arkansas','California','Florida','Georgia','Louisiana','Mississippi','Nevada','New Mexico','North Carolina','Oklahoma','South Carolina','Tennessee','Texas']

#####################################
#            HOME PAGE              #
#####################################
st.title('ChatGPT Property Data Streamlit :house:')
st.subheader('Market Data')

# read data
url = "https://raw.githubusercontent.com/analyticsariel/public-data/main/202303_redfin_states_sfh.csv"
df = load_data(url)
# smile states
with st.expander("Filters", expanded=False):
    st.markdown("_Default to smile states_ :smile:")
    region_list_filter = st.multiselect(
        'Filter on States', 
        list(df['region'].unique()),
        default_region_list
        )

df = df.loc[df['region'].isin(region_list_filter)]\
    .sort_values(by=['period_begin'], ascending=False)\
    .reset_index(drop=True)

# display df
st.dataframe(df.head())

# api key
openai_api_key = st.text_input(
    "API key", 
    placeholder='1234567890',
    type='password',
    disabled=False,
    help='Enter your OpenAI api key.'
)

# user query
user_q = st.text_input(
    "User Query", 
    placeholder="Which state has the lowest median dom?",
    help="Enter a question based on the dataset")

# commit data to sql
data = prepare_data(df)
conn = sqlite3.connect(uri)
data.to_sql(table_name, conn, if_exists='replace', index=False)

# create db engine
eng = create_engine(
    url='sqlite:///file:memdb1?mode=memory&cache=shared', 
    poolclass=StaticPool, # single connection for requests
    creator=lambda: conn)
db = SQLDatabase(engine=eng)

# create open AI conn and db chain
if openai_api_key:
  llm = OpenAI(
      openai_api_key=openai_api_key, 
      temperature=0, # creative scale
      max_tokens=300)
  db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)

# run query and display result
if openai_api_key and user_q:
    result = db_chain.run(user_q)
    st.write(result)

Writing app.py


## Run streamlit in background

In [3]:
!streamlit run /content/app.py &>/content/logs.txt &

## Expose the port 8501
Then just click in the `url` showed.

A `log.txt`file will be created.

In [4]:
!npx localtunnel --port 8501

[K[?25hnpx: installed 22 in 4.944s
your url is: https://eighty-taxes-show-34-138-154-50.loca.lt
^C


# End Notebook