In [245]:
import pandas as pd
import wikipediaapi
# import notion
import openai
import json
import requests

from vars import openai_key, notion_token, notion_database_id

openai.api_key = openai_key

In [246]:
categorias = pd.read_csv('resources/cat.csv', header=None, index_col=0).index.tolist()

In [247]:
def get_completion(prompt, model="gpt-3.5-turbo", temperature=0):
    """function to return content from the openai api prompt"""
    messages = [{"role": "user", "content": prompt}]
    response = openai.ChatCompletion.create(
        model=model,
        messages=messages,
        temperature=temperature, 
    )
    return response.choices[0].message["content"]

In [248]:
def import_wiki_page(page_name , language = 'es'):
	"""Importa una página de wikipedia"""
	wiki = wikipediaapi.Wikipedia(language)
	page = wiki.page(page_name)
	exists = page.exists()
	summary = page.summary
	url = page.fullurl
	sections = page.sections
	return page_name, exists, summary, url, sections

In [265]:
def get_summary(page_name, exists, summary):
	"""Trae summary de una página de wikipedia"""
	if exists:
		prompt = f"""
		Tu tarea es generar un resumen corto de un Artículo de wikipedia sobre {page_name} delimitado en triple backticks en no más de 40 palabras
		Conserva el tono informativo e impersonal del artículo.
		Omite información de poca relevancia.
		Clasifíca el artículo en una de las siguientes categorías: {categorias}
		Deriva una lista de entre 2 y 5 keywords principales del artículo
		El formato de salida SIEMPRE debe ser JSON con los siguientes valores de llave:	resumen, categoria, keywords
		Artículo: ```{summary}```
		"""

		response = json.loads(get_completion(prompt))

		return response['resumen'], response['categoria'], response['keywords']

	else:
		return f'La página {page_name} no existe en wikipedia'

In [256]:
def get_section_summary(page_name, section):
	"""Trae summary de una sección de wikipedia"""
	
	prompt = f"""
	Tu tarea es generar un resumen corto de una sección de un Artículo de wikipedia sobre {page_name} delimitada en triple backticks en no más de 40 palabras
	Conserva el tono informativo e impersonal de la sección.
	Omite información de poca relevancia, no incluyas información de otras secciones.
	El formato de salida debe ser texto plano.
	Artículo: ```{section}```
	"""

	response = get_completion(prompt)

	return response

In [262]:
# Create a new page in Notion
def createPage(databaseID, headers, page_name, summary, category, keywords, url, sections):
    """crea una página en una database de notion"""
    createUrl = 'https://api.notion.com/v1/pages'
    newPageData = {
        "parent": { "database_id": databaseID },
        "object": "page",
        "properties": {
            "Título": {
                "title": [
                    {
                        "text": {
                            "content": page_name
                        }
                    }
                ]
            },
            "Tags": {
                    "multi_select":[
                    ]
                },
            "Categoría": {
                "select": {
                    "name": category
                }
            },
            "Revisada": {
                    "checkbox": False
                },
            "URL": {
                "url": url
            },
            }
        }
    
    for keyword in keywords:
        newPageData["properties"]["Tags"]["multi_select"].append({"name": keyword})
        
    data = json.dumps(newPageData)

    res_insert = requests.request("POST", createUrl, headers=headers, data=data)
    print(res_insert.content)

    newPageID = json.loads(res_insert.content)["id"]
    
    # insert new block of summary in the new page
    newPageData = {
        "children": [
            {
                "object": "block",
                "type": "heading_1",
                "heading_1": {
                    "rich_text": [
                        {
                            "type": "text",
                            "text": {
                                "content": page_name
                            }
                        }
                    ]
                }
            },
            {
                "object": "block",
                "type": "paragraph",
                "paragraph": {
                    "rich_text": [
                        {
                            "type": "text",
                            "text": {
                                "content": summary
                            }
                        }
                    ]
                }
            }
        ]
    }

    # insert new block of title and text for every section
    excluded_titles = ['Referencias', 'Véase también', 'Enlaces externos', 'Notas', 'Bibliografía']

    for section in sections:
        if section.title not in excluded_titles:
            newPageData["children"].append({
                    "object": "block",
                    "type": "heading_2",
                    "heading_2": {
                        "rich_text": [
                            {
                                "type": "text",
                                "text": {
                                    "content": section.title
                                }
                            }
                        ]
                    }
                })
            newPageData["children"].append({
                    "object": "block",
                    "type": "paragraph",
                    "paragraph": {
                        "rich_text": [
                            {
                                "type": "text",
                                "text": {
                                    "content": get_section_summary(page_name, section.text)
                                }
                            }
                        ]
                    }
                })

    data = json.dumps(newPageData)

    updateURL = f'https://api.notion.com/v1/blocks/{newPageID}/children'
    res_update = requests.request("PATCH", updateURL, headers=headers, data=data)
    print(res_update.content)

In [266]:
headers = {
    "Authorization": "Bearer " + notion_token,
    "Content-Type": "application/json",
    "Notion-Version": "2022-06-28"
}

page_name, exists, summary, url, sections = import_wiki_page('ChatGPT')

resumen, categoria, keywords = get_summary(page_name, exists, summary)

createPage(notion_database_id, headers, page_name, resumen, categoria, keywords, url, sections)

b'{"object":"page","id":"4b54291b-4a74-4814-86b9-cd1f6bb51422","created_time":"2023-05-04T17:37:00.000Z","last_edited_time":"2023-05-04T17:37:00.000Z","created_by":{"object":"user","id":"075b3e55-e840-436e-9df8-ec1fc55e920a"},"last_edited_by":{"object":"user","id":"075b3e55-e840-436e-9df8-ec1fc55e920a"},"cover":null,"icon":null,"parent":{"type":"database_id","database_id":"a0911710-5246-4070-8eb3-400075b24127"},"archived":false,"properties":{"Created time":{"id":"NPI%3F","type":"created_time","created_time":"2023-05-04T17:37:00.000Z"},"Revisada":{"id":"Q_NY","type":"checkbox","checkbox":false},"Categor\xc3\xada":{"id":"SPty","type":"select","select":{"id":"b466bee3-7ed0-41f4-bc7e-2a3a8dc5b9f8","name":"Tecnolog\xc3\xada","color":"pink"}},"Tags":{"id":"Zw%3CQ","type":"multi_select","multi_select":[{"id":"e2c7defb-0590-49f7-a58f-e56e223fa58a","name":"ChatGPT","color":"yellow"},{"id":"ac3bb675-c905-422a-a8f7-47661f5e3ca3","name":"inteligencia artificial","color":"red"},{"id":"8ddc0403-04f9