# <b><ins><font size=7>AI-Builders Showcase</font></ins></b>

## <font color='2CA2EE' size=6> Import Libraries and Access API Key</font>

In [21]:
from bs4 import BeautifulSoup
from bs4.element import Comment
from urllib.request import urlopen, Request
import urllib.parse
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain, SequentialChain
from langchain.callbacks import get_openai_callback
import pandas as pd
from datetime import datetime
import openai
import os

Change API key to be your own API key

In [2]:
GPT_MODEL = 'gpt-3.5-turbo-16k'  

os.environ['OPENAI_API_KEY'] = 'sk-...' # OpenAI API key
openai.api_key = os.environ['OPENAI_API_KEY']

## <font color='2CA2EE' size=6>Web Scraping and Prompt Functions</font>

### <font size=5>• Web Scraping</font>

In [3]:
def tag_visible(element):
    if element.parent.name in ['style', 'script', 'head', 'title', 'meta', '[document]']:
        return False
    if isinstance(element, Comment):
        return False
    return True

In [4]:
def text_from_html(body):
    soup = BeautifulSoup(body, 'html.parser', from_encoding="utf-8")
    texts = soup.findAll(string=True)
    visible_texts = filter(tag_visible, texts)  
    return u" ".join(t.strip() for t in visible_texts)

In [5]:
# Url sometimes return UnicodeEncodeErrot, this helps to solve such problem
def url_encodeErrror_debug(url):
    url = urllib.parse.urlsplit(url)
    url = list(url)
    for i in range(len(url)):
        url[i] = urllib.parse.quote(url[i])
    url = urllib.parse.urlunsplit(url)
    return url

In [6]:
def parse_text(url):
    headers = {'User-Agent': 'Mozilla/5.0'}
    try:
        req = Request(url, headers=headers)
        page = urlopen(req)
    except UnicodeDecodeError:
        url = url_encodeErrror_debug(url)
        req = Request(url, headers=headers)
        page = urlopen(req)

    parsed_text = text_from_html(page)
    return parsed_text[:5000]
    

### <font size=5>• Template Import</font>

In [7]:
def import_summary_template(file):
    with open(file, 'r') as s:
        return s.read()

In [8]:
def import_translate_template(file):
    with open(file, 'r') as t:
        return t.read()

### <font size=5>• Prompting</font>

In [9]:
def get_completion(summary_temp, translate_temp, parsed_text, model=GPT_MODEL):
    
    llm = ChatOpenAI(temperature=0.9, model=model)
    summary_prompt = PromptTemplate(
        input_variables=["parsed_text"],
        template = summary_temp
    )
    summary_chain = LLMChain(llm=llm, prompt=summary_prompt, output_key="summary")

    translate_prompt = PromptTemplate(
        input_variables=["summary"],
        template = translate_temp
    )
    translate_chain = LLMChain(llm=llm, prompt=translate_prompt, output_key="translated_summary")

    overall_chain = SequentialChain(
        chains=[summary_chain, translate_chain],
        input_variables=["parsed_text"],
        output_variables=["summary", "translated_summary"]
    )
    return overall_chain({"parsed_text": parsed_text})


## <font color='2CA2EE' size=6>Main Program</font>

In [12]:
summary_file = './template/summary_prompt.txt'
translate_file = './template/translate_prompt.txt'

summary_template = import_summary_template(summary_file)
translate_template = import_translate_template(translate_file)

In [13]:
# Import .csv to read blog websites
csv_file = "./csv/ai_builders_2023.csv"
df = pd.read_csv(csv_file)
dict_df = df.to_dict()

list_blogURL = list(dict_df['Blog URL'].values())

In [40]:
# Prompt to summary blogs
dict_ENGsummary = {}
dict_THsummary = {}
total_cb = 0
prompt_cb = 0
completion_cb = 0

for i in range(len(list_blogURL)):
   parsed_text =  parse_text(list_blogURL[i])
   with get_openai_callback() as cb:
      response = get_completion(summary_template, translate_template, parsed_text)
   dict_ENGsummary[i] = response['summary']
   dict_THsummary[i] = response['translated_summary']
   total_cb += cb.total_tokens
   prompt_cb += cb.prompt_tokens
   completion_cb += cb.completion_tokens
    
dict_df['English Summary'] = dict_ENGsummary
dict_df['Thai Summary'] = dict_THsummary

In [41]:
# Export into .csv
summary_df = pd.DataFrame(dict_df)
summary_df.to_csv('./csv/ai_builders_2023_summary.csv')

In [42]:
# Export total tokens and costs
callback = './callback.txt'
now = datetime.now()
dt_string = now.strftime("%d/%m/%Y %H:%M:%S")

with open(callback, 'a') as c:
    c.write(f"""
{dt_string}\n
Total Tokens used: {total_cb}
    Prompt Tokens: {prompt_cb}
    Completion Tokens: {completion_cb}
Cost (USD): ${(total_cb/1000)*0.003}\n
""")
    c.write('----------------------------------------')
c.close()