# Newsletter

In [3]:
!pip install --quiet --requirement requirements.txt

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sagemaker 2.203.0 requires uvicorn==0.22.0, but you have uvicorn 0.23.2 which is incompatible.[0m[31m
[0m

In [61]:
import os
import pprint

import hdbscan
import pandas as pd
from typing import Any, Dict
import boto3
import html2text
import streamlit as st
from streamlit_gsheets import GSheetsConnection

from langchain import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.llms.bedrock import Bedrock
from langchain.prompts import PromptTemplate
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.output_parsers import PandasDataFrameOutputParser
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field, validator

from langchain.globals import set_verbose
from langchain.globals import set_debug
set_verbose(False)
set_debug(False)

# OPENAI Keys
# OPENAI_API_KEY=xxx
from dotenv import load_dotenv
load_dotenv()

True

## Import Google Sheet

In [62]:
url = "https://docs.google.com/spreadsheets/d/1-d9zHiwDtVUvP9v7ZW3O2Ys6f24Yz_W8zKaNb5uy8dY/edit#gid=0"
# Create a connection object.
conn = st.connection("gsheets", type=GSheetsConnection)
df = conn.read(spreadsheet=url, worksheet="0", ttl="24h")

2024-01-25 17:41:31.343 No runtime found, using MemoryCacheStorageManager


### Clean dataframe

In [63]:
def html_to_text(text):
    text = "" if isinstance(text,float) else text
    h = html2text.HTML2Text()
    h.ignore_links = True
    return h.handle(text)

df['ArticleContentTxt'] = df['ArticleContent'].apply(html_to_text)

## French title and summary of each blog post

In [41]:
#model = ChatOpenAI(temperature=0)

In [64]:
modelId = "anthropic.claude-v2:1"
bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-west-2",
)
model = Bedrock(
    model_id=modelId,
    model_kwargs={
        "max_tokens_to_sample": 4096,
        "stop_sequences": [],
        "temperature": 0.5,  # Use a lower value to decrease randomness in the response.
        "top_p": 1,  # Use a lower value to ignore less probable options.
        "top_k": 250,  # Specify the number of token choices the model uses to generate the next token.
    },
    client=bedrock_client,
)

In [65]:
def transform_df_columns_to_list_dicts(dataframe: pd.DataFrame, column_names: List[str]) -> List[dict]:
    result = []
    for row in dataframe.itertuples():
        dictionary = {}
        for column_name in column_names:
            dictionary[column_name] = getattr(row, column_name)
        result.append(dictionary)
    return result

def transform_list_of_dicts_to_dataframe(dicts):
   
    # Create an empty DataFrame with appropriate column names
    column_names = set(key for dictionary in dicts for key in dictionary.keys())
    df_tmp = pd.DataFrame(columns=list(column_names))

    # Transform dictionaries to the DataFrame
    for dictionary in dicts:
        df_tmp = df_tmp._append(dictionary,ignore_index=True)        
    return df_tmp


new_dicts = transform_df_columns_to_list_dicts(df, ['ArticleTitle', 'ArticleURL'])
new_df = transform_list_of_dicts_to_dataframe(new_dicts)

### With PyDantic

In [72]:
# Data structure
class RssItemTitles(BaseModel):
    ArticleTitle: str = Field(description="Title of the article")
    ArticleTitleFrench: str = Field(description="Title of the article translated in French")
    #ArticleContentTxt: str = Field(description="Content of the article")
    ArticleSummaryFrench: str = Field(description="Summary of content of the article translated in French")
    
        

translate_template = """
You will be acting as a Solutions Architect, working for Amazon Web Services. You are a specialist of Cloud Computing technologies and AWS. You write articles on AWS Blog post. 

First, translate in French, the title of this article.
Secondly, create an engaging summary of one sentence in french based of the content of this article.

{format_instructions}

<title>
{ArticleTitle}
</title>

<content>
{ArticleContentTxt}
</content>

"""

parser = PydanticOutputParser(pydantic_object=RssItemTitles)
prompt_translate = PromptTemplate.from_template(
    template=translate_template,
    partial_variables={"format_instructions": parser.get_format_instructions()},
)
chain = prompt_translate | model | parser
#chain.invoke({"ArticleTitle":"Automating CloudFront Continuous Deployment with a CI/CD Pipeline", "ArticleContentTxt":"In November 2022, Amazon Web Services (AWS) announced the launch of Amazon CloudFront continuous deployment, extending the functionality of your existing CloudFront distributions by allowing you to test and validate configuration changes to a percentage of live traffic before extending to your wider audience. Previously, customers had to do the heavy lifting of changing DNS records and creating separate domains for testing to then override DNS settings on clients for utilizing that endpoint once ready for production. Additionally, changes on lower environments need a hard change to production which can negatively affect production traffic. These efforts create inconsistencies, headaches, and significant overhead between environments that are hard to manage and maintain."})
results = chain.batch(transform_df_columns_to_list_dicts(df, ['ArticleTitle','ArticleContentTxt']))
results

[RssItemTitles(ArticleTitle='Amazon Titan Image Generator Demo - Image Playground | Amazon Web Services', ArticleTitleFrench="Démonstration du générateur d'images Titan d'Amazon - terrain de jeu d'images | Amazon Web Services", ArticleSummaryFrench="Le générateur d'images Titan d'Amazon permet aux créateurs de contenu une idéation et une itération rapides résultant en une génération d'images à haute efficacité."),
 RssItemTitles(ArticleTitle='Amazon ECS and AWS Fargate now integrate with Amazon EBS', ArticleTitleFrench='Amazon ECS et AWS Fargate sont désormais intégrés avec Amazon EBS', ArticleSummaryFrench="Amazon ECS et AWS Fargate permettent maintenant de provisionner et attacher facilement des volumes EBS aux tâches Amazon ECS s'exécutant sur Fargate et EC2 à l'aide des API Amazon ECS."),
 RssItemTitles(ArticleTitle='Effective data sorting with Amazon DynamoDB', ArticleTitleFrench='Un tri efficace des données avec Amazon DynamoDB', ArticleSummaryFrench='Comment trier efficacement l

### With DataFrame

In [56]:
parser = PandasDataFrameOutputParser(dataframe=df)

df_template = """
You will be acting as a Solutions Architect, working for Amazon Web Services. you are a specialist of Cloud technologies and AWS.

{format_instructions}

{query}

"""
# Set up the prompt.
prompt = PromptTemplate(
    template=df_template,
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser
#df_query = "translate in French, the content of each line of the column ArticleTitle."
df_query = "First step: for each row of the dataframe.\n Second step: get the text of the column ArticleContentTxt.\n"
parser_output = chain.invoke({"query": df_query })
parser_output

[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence] Entering Chain run with input:
[0m{
  "query": "First step: for each row of the dataframe.\n Second step: get the text of the column ArticleContentTxt.\n Third step: Summarize this text."
}
[32;1m[1;3m[chain/start][0m [1m[1:chain:RunnableSequence > 2:prompt:PromptTemplate] Entering Prompt run with input:
[0m{
  "query": "First step: for each row of the dataframe.\n Second step: get the text of the column ArticleContentTxt.\n Third step: Summarize this text."
}
[36;1m[1;3m[chain/end][0m [1m[1:chain:RunnableSequence > 2:prompt:PromptTemplate] [1ms] Exiting Prompt run with output:
[0m{
  "lc": 1,
  "type": "constructor",
  "id": [
    "langchain",
    "prompts",
    "base",
    "StringPromptValue"
  ],
  "kwargs": {
    "text": "\nYou will be acting as a Solutions Architect, working for Amazon Web Services. you are a specialist of Cloud technologies and AWS.\n\nThe output should be formatted as a string as the operat

OutputParserException: Invalid operation: do_something. Please check the format instructions.