In [13]:
import requests
import os
import pandas as pd


from dotenv import load_dotenv
load_dotenv()
api_key = os.environ.get("ACCESS_TOKEN")
client_id = os.environ.get("IGDB_CLIENT")

import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
import string


# Download NLTK stopwords if not already downloaded
nltk.download('stopwords')
nltk.download('punkt')
import re



[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/bradleywise/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /Users/bradleywise/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


## Using IGDB Database API

In [49]:
# Endpoint URL
url = 'https://api.igdb.com/v4/games'

# Headers containing your API key
headers = {
    'Client-ID': client_id,
    'Authorization': f'Bearer {api_key}'
}

In [51]:
url = 'https://api.igdb.com/v4/platforms'
# Initialize list to store all platforms
all_platforms = []

# Query parameters
params = {
    'fields': 'id,name',
    'limit': 50,  # Limiting the number of results per request
    'offset': 0   # Initial offset
}

while True:
    # Making the GET request
    response = requests.get(url, headers=headers, params=params)

    # Checking if the request was successful
    if response.status_code == 200:
        # Parsing and appending the platforms to the list
        data = response.json()
        all_platforms.extend(data)

        # Check if there are more platforms to fetch
        if len(data) < 50:
            break  # Exit the loop if no more platforms are available
        else:
            params['offset'] += 50  # Increment the offset for the next request
    else:
        print(f"Error: {response.status_code} - {response.text}")
        break

sorted_platforms = sorted(all_platforms, key=lambda x: x['id'])
sorted_platforms


[{'id': 3, 'name': 'Linux'},
 {'id': 4, 'name': 'Nintendo 64'},
 {'id': 5, 'name': 'Wii'},
 {'id': 6, 'name': 'PC (Microsoft Windows)'},
 {'id': 7, 'name': 'PlayStation'},
 {'id': 8, 'name': 'PlayStation 2'},
 {'id': 9, 'name': 'PlayStation 3'},
 {'id': 11, 'name': 'Xbox'},
 {'id': 12, 'name': 'Xbox 360'},
 {'id': 13, 'name': 'DOS'},
 {'id': 14, 'name': 'Mac'},
 {'id': 15, 'name': 'Commodore C64/128/MAX'},
 {'id': 16, 'name': 'Amiga'},
 {'id': 18, 'name': 'Nintendo Entertainment System'},
 {'id': 19, 'name': 'Super Nintendo Entertainment System'},
 {'id': 20, 'name': 'Nintendo DS'},
 {'id': 21, 'name': 'Nintendo GameCube'},
 {'id': 22, 'name': 'Game Boy Color'},
 {'id': 23, 'name': 'Dreamcast'},
 {'id': 24, 'name': 'Game Boy Advance'},
 {'id': 25, 'name': 'Amstrad CPC'},
 {'id': 26, 'name': 'ZX Spectrum'},
 {'id': 27, 'name': 'MSX'},
 {'id': 29, 'name': 'Sega Mega Drive/Genesis'},
 {'id': 30, 'name': 'Sega 32X'},
 {'id': 32, 'name': 'Sega Saturn'},
 {'id': 33, 'name': 'Game Boy'},
 {'i

In [59]:
url = 'https://api.igdb.com/v4/games'
all_games = []

params = {
    'fields': 'name,first_release_date',
    'filter[platforms][eq]': 18,  # Platform ID for NES
    'limit': 50,  # Limiting the number of results per request
    'offset': 0  # Initial offset
}
# Continuously call the API until all games are retrieved
while True:
    # Making the GET request
    response = requests.get(url, headers=headers, params=params)

    # Checking if the request was successful
    if response.status_code == 200:
        # Parsing and appending the games to the list
        data = response.json()
        all_games.extend(data)

        # Check if there are more games to fetch
        if len(data) < 50:
            break  # Exit the loop if no more games are available
        else:
            params['offset'] += 50  # Increment the offset for the next request
    else:
        print(f"Error: {response.status_code} - {response.text}")
        break

In [63]:
# all_games

## Wikipedia API

In [14]:
url = 'https://en.wikipedia.org/wiki/List_of_Nintendo_Entertainment_System_games'
bestsellers_url = 'https://en.wikipedia.org/wiki/List_of_best-selling_Nintendo_Entertainment_System_video_games'

In [15]:
data = pd.read_html(url)
all_titles = data[1]
level_0 = all_titles.columns.get_level_values(0)
level_1 = all_titles.columns.get_level_values(1)
all_titles.columns = level_1
all_titles = all_titles.rename(columns={'Title [7][8]' : 'Title', 'Publisher(s) [7]' : 'Publisher(s)'})

In [5]:
# all_titles.Title

In [16]:
# bestsellers = pd.read_html(bestsellers_url)[2]
# bestsellers

In [17]:
all_titles.sort_values(by='Title')[0:10]

Unnamed: 0,Title,Developer(s),Publisher(s),First released,JP,NA,PAL
0,'89 Dennou Kyuusei Uranai,Micronics,Jingukan Polaris,"December 10, 1988JP","December 10, 1988",Unreleased,Unreleased
4,10-Yard Fight,Tose,IremJP NintendoNA/PAL,"August 30, 1985JP","August 30, 1985","October 18, 1985","December 6, 1986"
6,1942,Micronics,Capcom,"December 11, 1985JP","December 11, 1985",November 1986,Unreleased
7,1943: The Battle of Midway 1943: The Battle of...,Capcom,Capcom,"June 20, 1988JP","June 20, 1988",October 1988,Unreleased
8,1999: Hore Mita Koto Ka! Seikimatsu,Coconuts Japan,Coconuts Japan,"September 18, 1992JP","September 18, 1992",Unreleased,Unreleased
2,4 Nin Uchi Mahjong,Hudson Soft,Nintendo,"November 2, 1984JP","November 2, 1984",Unreleased,Unreleased
5,720°,Beam Software,Mindscape,November 1989NA,Unreleased,November 1989,Unreleased
3,8 Eyes,Thinking Rabbit,SETAJP TaxanNA,"September 27, 1988JP","September 27, 1988",January 1990,Unreleased
160,A Boy and His Blob: Trouble on Blobolonia Fush...,Imagineering,JalecoJP Absolute EntertainmentNA NintendoPAL,January 1990NA,"November 29, 1990",January 1990,1991
866,A Nightmare on Elm Street,Rare,LJN,October 1990NA,Unreleased,October 1990,Unreleased


In [18]:
def remove_stopwords(text):
    tokens = word_tokenize(text)
    stop_words = set(stopwords.words('english'))
    punctuation = set(string.punctuation)
    stop_words.update(punctuation)
    contraction_pattern = re.compile(r"\b\w+(?:['’`]\w+)?")
    filtered_tokens = [word for word in tokens if word.lower() not in stop_words and word not in punctuation and contraction_pattern.fullmatch(word) is not None]

    # filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
    return ' '.join(filtered_tokens)

In [19]:
from collections import Counter
results = Counter()
all_titles['Processed_Title'] = all_titles['Title'].apply(remove_stopwords)
all_titles['Processed_Title'].str.lower().str.split().apply(results.update)
# print(results)

0       None
1       None
2       None
3       None
4       None
        ... 
1381    None
1382    None
1383    None
1384    None
1385    None
Name: Processed_Title, Length: 1386, dtype: object

In [21]:
len(results)

2374

In [9]:
NES_Titles = all_titles.sort_values(by='Title')
NES_Titles.to_csv('../data/nes_titles.csv', index=False)