In [2]:
from dotenv import load_dotenv
import os
import requests
import time
from pymongo import MongoClient

load_dotenv()

# IGDB API credentials
CLIENT_ID = os.getenv("CLIENT_ID")
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")

# IGDB API Endpoint
url = "https://api.igdb.com/v4/games"

# Headers for API requests
headers = {
    "Client-ID": CLIENT_ID,
    "Authorization": f"Bearer {ACCESS_TOKEN}",
    "Accept": "application/json"
}

In [5]:
# MongoDB setup
MONGO_URI = "mongodb+srv://veldiaashrithrao:aashrith@bigdata.dvaxk.mongodb.net/?retryWrites=true&w=majority&appName=games_database"
client = MongoClient(MONGO_URI)
db = client["games_database"]  # Database name
collection = db["raw_data"]  # Collection name

# IGDB API Query Template
query_template = """
fields name, genres.name, platforms.name, rating, first_release_date, follows, hypes, themes.name, game_modes.name, player_perspectives.name,
involved_companies.company.name;
limit 500;
offset {offset};
"""

# Fetch and store data in batches
batch_size = 500
total_rows = 130000

for offset in range(0, total_rows, batch_size):
    query = query_template.format(offset=offset)
    response = requests.post(url, headers=headers, data=query)
    
    if response.status_code == 200:
        games = response.json()
        
        # Process and insert data into MongoDB
        if games:
            collection.insert_many(games)
            print(f"Inserted {len(games)} games (offset {offset}) into MongoDB")
        else:
            print(f"No games found at offset {offset}")
    else:
        print(f"Error: {response.status_code}, {response.text}")
        break  # Stop fetching if an error occurs

    # Respect IGDB's rate limit (4 requests/sec)
    time.sleep(0.25)

print("Data fetching and insertion completed!")


Inserted 500 games (offset 0) into MongoDB
Inserted 500 games (offset 500) into MongoDB
Inserted 500 games (offset 1000) into MongoDB
Inserted 500 games (offset 1500) into MongoDB
Inserted 500 games (offset 2000) into MongoDB
Inserted 500 games (offset 2500) into MongoDB
Inserted 500 games (offset 3000) into MongoDB
Inserted 500 games (offset 3500) into MongoDB
Inserted 500 games (offset 4000) into MongoDB
Inserted 500 games (offset 4500) into MongoDB
Inserted 500 games (offset 5000) into MongoDB
Inserted 500 games (offset 5500) into MongoDB
Inserted 500 games (offset 6000) into MongoDB
Inserted 500 games (offset 6500) into MongoDB
Inserted 500 games (offset 7000) into MongoDB
Inserted 500 games (offset 7500) into MongoDB
Inserted 500 games (offset 8000) into MongoDB
Inserted 500 games (offset 8500) into MongoDB
Inserted 500 games (offset 9000) into MongoDB
Inserted 500 games (offset 9500) into MongoDB
Inserted 500 games (offset 10000) into MongoDB
Inserted 500 games (offset 10500) int

In [8]:
row_count = collection.count_documents({})
print(f"Number of rows: {row_count}")

# Fetch all unique keys (columns) in the collection
unique_columns = set()  # Use a set to collect unique fields
cursor = collection.find({}, {"_id": 0})  # Exclude the `_id` field if not needed

for doc in cursor:
    unique_columns.update(doc.keys())  # Add the keys (field names) from each document to the set

print(f"Number of unique columns: {len(unique_columns)}")



Number of rows: 130000
Number of unique columns: 11
