In [1]:
from coveopush import CoveoPush
from coveopush import Document
from coveopush import CoveoPermissions
from coveopush import CoveoConstants
from bs4 import BeautifulSoup
import requests
import json
from config import sourceId, orgId, apiKey


In [2]:
# Push API Configs
push = CoveoPush.Push(
    p_SourceId=sourceId,
    p_OrganizationId=orgId,
    p_ApiKey=apiKey,
    p_Endpoint=CoveoConstants.Constants.PushApiEndpoint.QA_PUSH_API_URL,
)
user_email = "jmeng@coveo.com"
my_permissions = CoveoPermissions.PermissionIdentity(CoveoConstants.Constants.PermissionIdentityType.User, "", user_email)
allowAnonymous = True

In [3]:
# SCRAPE PokemonDB
def requestAndParse(URL):
    page = requests.get(URL)
    return BeautifulSoup(page.content,'html.parser')

soup = requestAndParse("https://pokemondb.net/pokedex/national")


generations = soup.find_all('div', class_="infocard-list")

In [14]:
# for generation in generations:
#     pokemon = generation.find_all("a")
#     print(pokemon)
def processGenerations():
    processedData = []
    for index, generation in enumerate(generations):
        processedData.append(processGeneration(generation, index + 1))
    return processedData

def processGeneration(generationData, generationNumber):
    infoCards = generationData.find_all('div', class_="infocard")
    return extractAllPokemonData(infoCards, generationNumber)

In [15]:
def extractDataFromCard(infoCard, generationNumber):
    pokemonName = infoCard.find('a',class_="ent-name").text
    pokemonTypes = []
    for pokemonType in infoCard.find_all('a',class_='itype'):
        typeText = pokemonType.text
        pokemonTypes.append(typeText)
    return {
        "name": pokemonName,
        "type": pokemonTypes,
        "generation": generationNumber
    }

In [16]:
def extractAllPokemonData(infoCards, generationNumber):
    pokemonData = []
    for pokemonCard in infoCards:
        pokemonData.append(extractDataFromCard(pokemonCard, generationNumber))
    return pokemonData

In [33]:
def pushData(generationData):
    count = 0
    for generation in generationData:
        for pokemon in generation:
            if count < 5:
                print(pokemon)
                pokemonName = pokemon["name"]
                mydoc = Document(f"file://folder/pokemon/{pokemonName}")
                mydoc.FileExtension = ".json"
                mydoc.Title = pokemonName
                mydoc.AddMetadata("connectortype", "CSV")
                mydoc.SetData(pokemonName)
                mydoc.AddMetadata("type", pokemon["type"])
                mydoc.AddMetadata("generation", pokemon["generation"])
                mydoc.SetAllowedAndDeniedPermissions([my_permissions], [], allowAnonymous)
                push.AddSingleDocument(mydoc)
                count += 1

In [34]:
extractedData = processGenerations()
pushData(extractedData)

{'name': 'Bulbasaur', 'type': ['Grass', 'Poison'], 'generation': 1}
{'name': 'Ivysaur', 'type': ['Grass', 'Poison'], 'generation': 1}
{'name': 'Venusaur', 'type': ['Grass', 'Poison'], 'generation': 1}
{'name': 'Charmander', 'type': ['Fire'], 'generation': 1}
{'name': 'Charmeleon', 'type': ['Fire'], 'generation': 1}
