In [None]:
from bs4 import BeautifulSoup
import json
import requests 
import pandas as pd
from sqlalchemy import create_engine
import re

## Web Scrape

# define the categories to search for
categories = ["action", "adventure", "role-playing", "sports", "race"]

for category in categories:
    search_filter = "topsellers"
    url = "https://store.steampowered.com/search/"
    #params, english language and nd1 = 1 is when the url is opened through google
    params = {"term" : category, "supportedlang" : "english", "filter" : search_filter, "ndl" : "1"}

    response = requests.get(url, params=params)
    soup = BeautifulSoup(response.text, "html.parser")
    #find all game in top sellers
    games = soup.findAll("a", attrs={"data-gpnav":"item"})
    if len(games) < 20:
        print(f"Not enough games found for category {category}")
        continue
    #dictionary for game details
    games_details = {
        "title": [],
        "price": [],
        "app_id": [],
    }
    #get 20 games
    for game in range(20):
        app_id = games[game].attrs.get("data-ds-appid")
        title = games[game].find("span", attrs={"class": "title"})
        price = games[game].find("div", attrs={"class": "col search_price responsive_secondrow"})
        # Append the variables to the appropriate keys in the games_details dictionary. is not None so that df works
        games_details["title"].append(title.text if title is not None else None)
        games_details["price"].append(price.text.strip() if price is not None else None)
        games_details["app_id"].append(app_id if app_id is not None else None)
        # Print out the variables
        


    ## API Request

    games_reviews = {
        "app_id": [],
        "review_score": [],
        "review_score_desc": [],
        "total_positive": [],
        "total_negative": [],
        "total_review": [],
        "description": []
    }
    for app_id in games_details["app_id"]:
        games_reviews["app_id"].append(app_id)
        url = f"https://store.steampowered.com/app/{app_id}"
        response = requests.get(url)
        if response.status_code == 200:
            # Parse the HTML content of the game page with BeautifulSoup
            soup = BeautifulSoup(response.content, 'html.parser')
            # Extract the game description from the page and remove "about this game" and unnecessary spacing
            description = soup.find("div", {"id": "game_area_description"}).text.strip().replace("About This Game", "").replace('\r\n\t\t\t\t\t\t\t', '')
            # Add the game description to the dictionary
            games_reviews["description"].append(description)
            # Get the game reviews from the appreviews API
            url_reviews = f"https://store.steampowered.com/appreviews/{app_id}?json=1"
            response_reviews = requests.get(url_reviews)
            if response_reviews.status_code == 200:
                # Parse the response as a JSON dictionary
                data = response_reviews.json()
                # Check if the query was successful
                if data["success"] == 1:
                    # Extract the information we want
                    games_reviews["review_score"].append(data["query_summary"]["review_score"])
                    games_reviews["review_score_desc"].append(data["query_summary"]["review_score_desc"])
                    games_reviews["total_positive"].append(data["query_summary"]["total_positive"])
                    games_reviews["total_negative"].append(data["query_summary"]["total_negative"])
                    games_reviews["total_review"].append(data["query_summary"]["total_reviews"])
            else:
                # Print an error message if the query was not successful
                print(f"Failed to retrieve review data for app_id {app_id} with status code {response_reviews.status_code}")
        else:
            # Print an error message if the game page was not found
            print(f"Failed to retrieve game data for app_id {app_id} with status code {response.status_code}")
            
    engine = create_engine("mysql+mysqldb://USER:PASSWORD@isba-dev-01.cmv8g4d5f073.us-east-1.rds.amazonaws.com/sql_project?charset=utf8")

    #converting games_details to df
    df1 = pd.DataFrame(games_details)

    df1.to_sql(f"{category}_games_details", engine, if_exists='replace', index=False)

    #converting games_reviews to df
    df2 = pd.DataFrame(games_reviews)

    df2.to_sql(f"{category}_games_reviews", engine, if_exists='replace', index=False)         