# Imports

In [None]:
# System
import os
from pathlib import Path
from dotenv import load_dotenv
import importlib
import sys

# Data Management
from typing import Optional, Dict, Any
from pydantic import BaseModel, Field
import time
import random
import json

# Data Science
import pandas as pd
import polars as pl
import duckdb

# API Interactions
import requests
from tqdm.notebook import tqdm

In [None]:
# At the top of your notebook
import importlib
import sys
from pathlib import Path

# Add project root to path
sys.path.append(str(Path.cwd().parent))

# Import your modules
from src import steam_api_manager
from src import mongo_manager

# Configurations

In [None]:
# Load Environment Variables
load_dotenv()

# When you need to reload after changes
importlib.reload(steam_api_manager)
importlib.reload(mongo_manager)

# Get fresh instances of your classes
steam_api = steam_api_manager.SteamAPIManager()
mongo_manager = mongo_manager.MongoManager()

# Layer (Bronze): MongoDB

## Extract & Load Data

1. Get App Names

In [None]:
app_names = steam_api.get_app_names()

2. Update App Names

In [None]:
mongo_manager.update_app_names(app_names)

3. Check App Names

In [None]:
df_names = pl.LazyFrame(mongo_manager.database.names.find({}, {'_id':0, 'appid':1, 'name':1}))
# Initialize names collection dataframe
df_filtered = (
    df_names
    # .unique(subset='appid', keep='first')
    .filter(pl.col('name').str.len_chars() == 0)
    # .filter(pl.col('appid').is_duplicated())
)
df_filtered.collect()

## Get App Details

Compare with MongoDB for upsert logic: 

1. Always have to update all apps details
2. Later have to change the upsert logic

In [None]:
# All appids from names collection
df_names = pl.LazyFrame(mongo_manager.database.names.find({}, {'_id':0, 'appid':1}))
# All appids from details collection
df_details = pl.LazyFrame(mongo_manager.database.details.find({}, {'_id':0, 'appid':1}))
# All appids from no_details collection
df_no_details = pl.LazyFrame(mongo_manager.database.no_details.find({}, {'_id':0, 'appid':1}))

# Make union of details and no details appids
if df_no_details.collect().height > 0:
    df_details = pl.concat([df_details, df_no_details], how='vertical')

# Perform EXCEPT logic to find appids that need to be updated
df_except = df_names.join(df_details, on='appid', how='anti')
update_detail_appids = df_except.collect().to_series().to_list()

print(f"{len(update_detail_appids) = }")


Get app details and load to mongodb

In [None]:
for appid in tqdm(update_detail_appids, desc="Updating Collection: details"):
    app_details = steam_api.get_app_details(appid)
    mongo_manager.update_app_details(appid, app_details)

## Get App Tags

Compare with MongoDB for upsert logic:
1. only update appids from app details that are in: ['game', 'dlc', 'demo', 'series', 'episode', 'music', 'mod']

In [None]:
# All apids from details collection
df_details = (
    pl.LazyFrame(mongo_manager.database.details.find(
        {}, {'_id':0, 'appid':1, 'type': 1, 'release_date.coming_soon': 1}
    ))
    .with_columns([
        pl.col("release_date").struct.field("coming_soon").alias("release_date.coming_soon")
    ])
    .select(["appid", "type", "release_date.coming_soon"])
)

# All appids from tags collection
df_tags = (pl.LazyFrame(mongo_manager.database.tags.find({}, {'_id':0, 'appid':1})))

# Filter appids from details collection where detail types are used
df_details_filtered = (
    df_details
    .filter(
        (pl.col('type') == 'game') &
        ~(pl.col('release_date.coming_soon'))
    )
    .select(['appid'])
)

if df_tags.collect().shape[0] > 0:
    df_except = df_details_filtered.join(df_tags, on='appid', how='anti')
    update_tag_appids = df_except.collect().to_series().to_list()
else:
    update_tag_appids = df_details_filtered.collect().to_series().to_list()

print(f"{len(update_tag_appids) = }")

Get app tags and load to mongodb

In [None]:
for appid in tqdm(update_tag_appids, desc="Updating Collection: tags"):
    app_tags = steam_api.get_app_tags(appid)
    mongo_manager.update_app_tags(appid, app_tags)

## Get App Reviews

Compare with MongoDB for upsert logic: 
1. only update appids from app details that are in: ['game', 'dlc', 'demo', 'series', 'episode', 'music', 'mod']

In [None]:
# All appids from details collection
df_details = (
    pl.LazyFrame(mongo_manager.database.details.find(
        {}, {'_id':0, 'appid':1, 'type': 1, 'release_date.coming_soon': 1}
    ))
    .with_columns([
        pl.col("release_date").struct.field("coming_soon").alias("release_date.coming_soon")
    ])
    .select(["appid", "type", "release_date.coming_soon"])
)

# All appids from reviews collection
df_reviews = pl.LazyFrame(mongo_manager.database.reviews.find({}, {'_id':0, 'appid':1}))

# Filter appids from details collection where detail types are used
df_details_filtered = (
    df_details
        (pl.col('type') == 'game') &
        ~(pl.col('release_date.coming_soon'))
)

# Perform EXCEPT logic to find appids that need to be updated
if df_reviews.collect().shape[0] > 0:
    df_except = df_details_filtered.join(df_reviews, on='appid', how='anti')
    update_review_appids = df_except.collect().to_series().to_list()
else:
    update_review_appids = df_details_filtered.collect().to_series().to_list()

print(f"{len(update_review_appids) = }")

Get app reviews and load to mongodb

In [None]:
for appid in tqdm(update_review_appids, desc="Updating Collection: reviews"):
    app_reviews = steam_api.get_app_reviews(appid)
    mongo_manager.update_app_reviews(appid, app_reviews)