# NES Image Retrieval

### Purpose: Use webscraping to retrieve all NES images using URL cover art results from theGamesDB.net API. 

In [81]:
import pandas as pd
import os
import urllib
from urllib.request import Request, urlopen
import numpy as np
import PIL
from PIL import Image

In [82]:
# Create dataframe from NES Picture link csv
nes_df = pd.read_csv("piclists/nes_piclist.csv")
nes_df.head()

Unnamed: 0,id,GameTitle,ReleaseDate,thumb,Platform
0,5.0,Donkey Kong,1/1/82,boxart/original/front/5-2.jpg,NES
1,76.0,Bionic Commando,12/6/88,boxart/original/front/76-1.jpg,NES
2,112.0,Super Mario Bros. 3,2/12/90,boxart/original/front/112-2.jpg,NES
3,113.0,The Legend of Zelda,7/1/87,boxart/original/front/113-1.png,NES
4,121.0,Kirby's Adventure,3/26/93,boxart/original/front/121-1.jpg,NES


In [83]:
# Count records
nes_df.Platform.value_counts()

NES    1627
Name: Platform, dtype: int64

In [84]:
# Remove rows with empty thumb backlink
nes_df['thumb'].replace('', np.nan, inplace=True)

nes_df.dropna(subset=['thumb'], inplace=True)

nes_df.count()

id             1625
GameTitle      1625
ReleaseDate    1606
thumb          1625
Platform       1625
dtype: int64

In [85]:
# Concatenate base url with thumb links
base_url = "http://thegamesdb.net/banners/"
nes_df['URL'] = base_url + nes_df['thumb'].astype(str)
nes_df.head()

Unnamed: 0,id,GameTitle,ReleaseDate,thumb,Platform,URL
0,5.0,Donkey Kong,1/1/82,boxart/original/front/5-2.jpg,NES,http://thegamesdb.net/banners/boxart/original/...
1,76.0,Bionic Commando,12/6/88,boxart/original/front/76-1.jpg,NES,http://thegamesdb.net/banners/boxart/original/...
2,112.0,Super Mario Bros. 3,2/12/90,boxart/original/front/112-2.jpg,NES,http://thegamesdb.net/banners/boxart/original/...
3,113.0,The Legend of Zelda,7/1/87,boxart/original/front/113-1.png,NES,http://thegamesdb.net/banners/boxart/original/...
4,121.0,Kirby's Adventure,3/26/93,boxart/original/front/121-1.jpg,NES,http://thegamesdb.net/banners/boxart/original/...


In [86]:
# create list from URLs
nes_piclist = nes_df["URL"].tolist()
nes_piclist

['http://thegamesdb.net/banners/boxart/original/front/5-2.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/76-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/112-2.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/113-1.png',
 'http://thegamesdb.net/banners/boxart/original/front/121-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/123-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/125-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/130-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/133-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/135-1.png',
 'http://thegamesdb.net/banners/boxart/original/front/140-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/171-1.png',
 'http://thegamesdb.net/banners/boxart/original/front/189-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/205-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/208-2.jpg',
 'http://theg

In [87]:
# check link quality in list

print(nes_piclist[3])

http://thegamesdb.net/banners/boxart/original/front/113-1.png


In [89]:
print(nes_piclist)

['http://thegamesdb.net/banners/boxart/original/front/5-2.jpg', 'http://thegamesdb.net/banners/boxart/original/front/76-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/112-2.jpg', 'http://thegamesdb.net/banners/boxart/original/front/113-1.png', 'http://thegamesdb.net/banners/boxart/original/front/121-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/123-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/125-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/130-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/133-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/135-1.png', 'http://thegamesdb.net/banners/boxart/original/front/140-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/171-1.png', 'http://thegamesdb.net/banners/boxart/original/front/189-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/205-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/208-2.jpg', 'http://thegamesdb.net/bann

In [90]:
paginated_nes_urls = {}
url_list = []
count = 0
for url in nes_piclist:
    if count % 30 == 0:
        paginated_nes_urls[count // 30] = url_list
        url_list = []
    url_list.append(url)
    count += 1
paginated_nes_urls[(count // 30) + 1] = url_list
url_list = []
paginated_nes_urls

{0: [],
 1: ['http://thegamesdb.net/banners/boxart/original/front/5-2.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/76-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/112-2.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/113-1.png',
  'http://thegamesdb.net/banners/boxart/original/front/121-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/123-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/125-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/130-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/133-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/135-1.png',
  'http://thegamesdb.net/banners/boxart/original/front/140-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/171-1.png',
  'http://thegamesdb.net/banners/boxart/original/front/189-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/205-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front

In [62]:
# Test open save a single image before looping through all

# fd = urlopen(Request('http://thegamesdb.net/banners/boxart/original/front/5-2.jpg', headers={'User-Agent': 'Chrome'}))
# im = Image.open(fd)
# im.thumbnail([64,64])
# im = im.convert("RGB")
# im.save(f'tiles/{url[52:]}',format='JPEG')

In [91]:
# test nes_piclist

my_list_len = len(nes_piclist)

for i in range(0, 2):
     print(nes_piclist[i])

http://thegamesdb.net/banners/boxart/original/front/5-2.jpg
http://thegamesdb.net/banners/boxart/original/front/76-1.jpg


In [105]:


for i in range(48, 56):
    for url in paginated_nes_urls[i]:
        try:
            fd = urlopen(Request(url, headers={'User-Agent': 'Chrome'}))
            im = Image.open(fd)
            im.thumbnail([64,64])
            im = im.convert("RGB")
            im.save(f'tiles/{url[52:]}',format='JPEG')
        except KeyError:
            print('err')
        except ConnectionResetError:
            print('err1')
    print(i)
print('DONE')



48
49
50
51
52
53
54


  " Skipping tag %s" % (size, len(data), tag))


55
DONE
