# Newsletter

In [3]:
!pip install --quiet --requirement requirements.txt

[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
sagemaker 2.203.0 requires uvicorn==0.22.0, but you have uvicorn 0.23.2 which is incompatible.[0m[31m
[0m

In [11]:
import os
import pprint

import hdbscan
import pandas as pd
from typing import Any, Dict
import boto3
import html2text
import streamlit as st
from streamlit_gsheets import GSheetsConnection

from langchain import LLMChain
from langchain.chat_models import ChatOpenAI
from langchain.llms.bedrock import Bedrock
from langchain.prompts import PromptTemplate
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.output_parsers import PandasDataFrameOutputParser
from langchain.output_parsers import PydanticOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field, validator

## Import Google Sheet

In [6]:
url = "https://docs.google.com/spreadsheets/d/1-d9zHiwDtVUvP9v7ZW3O2Ys6f24Yz_W8zKaNb5uy8dY/edit#gid=0"
# Create a connection object.
conn = st.connection("gsheets", type=GSheetsConnection)
df = conn.read(spreadsheet=url, worksheet="0", ttl="24h")

2024-01-25 16:11:27.510 
  command:

    streamlit run /home/ec2-user/anaconda3/envs/pytorch_p310/lib/python3.10/site-packages/ipykernel_launcher.py [ARGUMENTS]
2024-01-25 16:11:27.528 No runtime found, using MemoryCacheStorageManager
2024-01-25 16:11:27.530 No runtime found, using MemoryCacheStorageManager


### Clean dataframe

In [7]:
def html_to_text(text):
    text = "" if isinstance(text,float) else text
    h = html2text.HTML2Text()
    h.ignore_links = True
    return h.handle(text)

df['ArticleContentTxt'] = df['ArticleContent'].apply(html_to_text)

## French title and summary of each blog post

In [8]:
modelId = "anthropic.claude-v2:1"
bedrock_client = boto3.client(
    service_name="bedrock-runtime",
    region_name="us-west-2",
)
model = Bedrock(
    model_id=modelId,
    model_kwargs={
        "max_tokens_to_sample": 4096,
        "stop_sequences": [],
        "temperature": 0.5,  # Use a lower value to decrease randomness in the response.
        "top_p": 1,  # Use a lower value to ignore less probable options.
        "top_k": 250,  # Specify the number of token choices the model uses to generate the next token.
    },
    client=bedrock_client,
)

In [9]:
def transform_df_columns_to_list_dicts(dataframe: pd.DataFrame, column_names: List[str]) -> List[dict]:
    result = []
    for row in dataframe.itertuples():
        dictionary = {}
        for column_name in column_names:
            dictionary[column_name] = getattr(row, column_name)
        result.append(dictionary)
    return result

def transform_list_of_dicts_to_dataframe(dicts):
   
    # Create an empty DataFrame with appropriate column names
    column_names = set(key for dictionary in dicts for key in dictionary.keys())
    df_tmp = pd.DataFrame(columns=list(column_names))

    # Transform dictionaries to the DataFrame
    for dictionary in dicts:
        df_tmp = df_tmp._append(dictionary,ignore_index=True)        
    return df_tmp


new_dicts = transform_df_columns_to_list_dicts(df, ['ArticleTitle', 'ArticleURL'])
new_df = transform_list_of_dicts_to_dataframe(new_dicts)

### With PyDantic

In [10]:
# Data structure
class RssItemTitles(BaseModel):
    ArticleTitle: str = Field(description="Title of the article")
    ArticleTitleFrench: str = Field(description="Title of the article in french")
    ArticleSummaryFrench: str = Field(description="Summary of the article in french")
    
        

translate_template = """
You will be acting as a Solutions Architect, working for Amazon Web Services. you are a specialist of Cloud technologies and AWS.

First, translate in French, the title of this article coming from the AWS Blog.
Secondly, create an engaging summary of two sentences in french based of the content of this article coming from the AWS Blog.

{format_instructions}

<title>
{ArticleTitle}
</title>

<content>
{ArticleContentTxt}
</content>

"""

parser = PydanticOutputParser(pydantic_object=RssItemTitles)
prompt_translate = PromptTemplate.from_template(
    template=translate_template,
    partial_variables={"format_instructions": parser.get_format_instructions()},
)
chain = prompt_translate | model | parser
#result = chain.invoke({"ArticleTitle":"Automating CloudFront Continuous Deployment with a CI/CD Pipeline"})
results = chain.batch(transform_df_columns_to_list_dicts(df, ['ArticleTitle','ArticleContentTxt']))

[RssItemTitles(ArticleTitle='Amazon Titan Image Generator Demo - Image Playground | Amazon Web Services', ArticleTitleFrench="Démonstration du générateur d'images Titan d'Amazon - terrain de jeu d'images | Amazon Web Services", ArticleSummaryFrench="Le générateur d'images Titan d'Amazon permet aux créateurs de contenu une idéation et une itération rapides résultant en une génération d'images à haute efficacité. Vous pouvez accéder au modèle de base du générateur d'images Titan d'Amazon dans Amazon Bedrock, qui vous aide à construire et à mettre à l'échelle facilement des applications d'IA générative avec de nouvelles capacités de génération et d'édition d'images."),
 RssItemTitles(ArticleTitle='Amazon ECS and AWS Fargate now integrate with Amazon EBS', ArticleTitleFrench="Amazon ECS et AWS Fargate s'intègrent désormais avec Amazon EBS", ArticleSummaryFrench="Amazon ECS et AWS Fargate permettent maintenant de provisionner et attacher facilement des volumes EBS aux tâches ECS sur Fargate

### With DataFrame

In [25]:
parser = PandasDataFrameOutputParser(dataframe=df)

df_template = """
You will be acting as a Solutions Architect, working for Amazon Web Services. you are a specialist of Cloud technologies and AWS.

{format_instructions}

{query}

"""
# Set up the prompt.
prompt = PromptTemplate(
    template=df_template,
    input_variables=["query"],
    partial_variables={"format_instructions": parser.get_format_instructions()},
)

chain = prompt | model | parser
parser_output = chain.invoke({"query": "translate in French, each line of the column ArticleTitle."})
parser_output

{'ArticleTitle': 0     Amazon Titan Image Generator Demo - Image Play...
 1     Amazon ECS and AWS Fargate now integrate with ...
 2           Effective data sorting with Amazon DynamoDB
 3     AWS re:Invent 2023 - Creating an interactive A...
 4     Automating CloudFront Continuous Deployment wi...
 5     How to leverage Application Load Balancer’s ad...
 6     How Wonder Dynamics is accelerating creativity...
 7     Host the Whisper Model on Amazon SageMaker: ex...
 8     Amazon EKS extended support for Kubernetes ver...
 9              OpenSearch Expands Leadership Beyond AWS
 10    Unlocking cloud-based quality of experience (Q...
 11    Power neural search with AI/ML connectors in A...
 12    Better understand sports fan data with Fan360 ...
 13                   Create Fan360 data products on AWS
 14    adidas: Building a streaming analytics applica...
 15    The journey to IPv6 on Amazon EKS: Interoperab...
 16    Announcing the Amazon Titan Multimodal Embeddi...
 17            