# VideoGames API
---

## Obtención de los datos

In [None]:
import os
import requests
import pandas as pd

RAWG_API_KEY = os.getenv("RAWG_API_KEY")

In [None]:
ENDPOINT = "https://api.rawg.io/api"

In [None]:
try:
    params = {"key": RAWG_API_KEY, "page_size": 40}
    res = requests.get(f"{ENDPOINT}/games", params=params)
    res.raise_for_status()
except Exception as e:
    print(e)

In [None]:
data = res.json()

In [None]:
len(data["results"])

In [None]:
data["next"]

In [None]:
id = data["results"][0].get("id")
id

In [None]:
try:
    params = {"key": RAWG_API_KEY, "page_size": 40}
    res = requests.get(f"{ENDPOINT}/games/{id}", params=params)
    res.raise_for_status()
except Exception as e:
    print(e)

In [None]:
from dataclasses import dataclass, asdict

In [None]:
@dataclass
class Game:
    id: int
    name: str
    released: str
    rating: float
    description: str
    playtime: float
    genres: str
    publisher: str
    image: str

In [None]:
def get_game_data(res) -> Game:
    data = res.json()
    id = data.get("id")
    name = data.get("name")
    description = data.get("description_raw")
    released = data.get("released")
    rating = data.get("rating")
    playtime = data.get("playtime")
    genres = ",".join([genre.get("name") for genre in data.get("genres")])
    publishers = data.get("publishers")
    if publishers and len(publishers) > 0:
        publisher = publishers[0].get("name")
    else:
        publisher = "no publisher detected"
    image = data.get("background_image", "no image")
    return Game(
        id, name, released, rating, description, playtime, genres, publisher, image
    )

In [None]:
games = []
for i in range(1, 21):
    print(f"Getting data for page ${i}")
    try:
        params = {"key": RAWG_API_KEY, "page_size": 40, "page": i}
        res = requests.get(f"{ENDPOINT}/games", params=params)
        res.raise_for_status()
        ids = [game.get("id") for game in res.json()["results"]]
        for id in ids:
            res = requests.get(f"{ENDPOINT}/games/{id}", params={"key": RAWG_API_KEY})
            res.raise_for_status()
            games.append(get_game_data(res))

    except Exception as e:
        print(e)

In [None]:
df = pd.DataFrame([asdict(game) for game in games])

In [None]:
df.shape

In [None]:
df.head()

In [None]:
df.to_csv("video-games-db.csv", index=False, encoding="utf-8")

## Procesado y obtención de embeddings

In [None]:
import pandas as pd

In [None]:
df = pd.read_csv("/content/video-games-db.csv", encoding="utf-8")
df.head()

In [None]:
for col in df.select_dtypes("object").columns:
    df[col] = df[col].str.lower()
    df[col] = df[col].str.replace(r"\(.*\)", "", regex=True)

In [None]:
df.head()

In [None]:
df["genres"] = df["genres"].str.split(",")

In [None]:
df.head()

In [None]:
df["released"].min(), df["released"].max()

### Embeddings

In [None]:
from sentence_transformers import SentenceTransformer

In [None]:
model = SentenceTransformer("all-MiniLM-L6-v2")  # Descargamos el modelo de embeddings

In [None]:
embedding = model.encode("Texto de prueba", convert_to_numpy=True)

In [None]:
embedding.shape

In [None]:
df_embed = df.copy()

In [None]:
texts = df_embed["description"].to_list()

In [None]:
embeddings = model.encode(texts, convert_to_numpy=True)

In [None]:
embeddings.shape

In [None]:
df_embed["embedding"] = embeddings.tolist()

In [None]:
df_embed.head()

In [None]:
df_embed.to_parquet("video-games-db-processed.parquet", index=False)