# Atari Image Retrieval

### Purpose: Use webscraping to retrieve all Atari images using URL cover art results from theGamesDB.net API. 

In [1]:
import pandas as pd
import os
import urllib
from urllib.request import Request, urlopen
import numpy as np
import PIL
from PIL import Image

In [3]:
# Create dataframe from NES Picture link csv
atari_df = pd.read_csv("piclists/atari_2600_piclist.csv")
atari_df.head()

Unnamed: 0,id,GameTitle,ReleaseDate,thumb,Platform
0,206,Commando,5/4/88,boxart/original/front/206-2.jpg,2600
1,207,DigDug,1/1/82,boxart/original/front/207-1.jpg,2600
2,10128,HERO,1/1/84,boxart/original/front/10128-1.jpg,2600
3,1292,Ghostbusters,1984,boxart/original/front/1292-2.jpg,2600
4,1341,Amidar,1/1/82,boxart/original/front/1341-1.jpg,2600


In [4]:
# Count records
atari_df.Platform.value_counts()

2600    801
Name: Platform, dtype: int64

In [5]:
# Remove rows with empty thumb backlink
atari_df['thumb'].replace('', np.nan, inplace=True)

atari_df.dropna(subset=['thumb'], inplace=True)

atari_df.count()

id             797
GameTitle      797
ReleaseDate    690
thumb          797
Platform       797
dtype: int64

In [6]:
# Concatenate base url with thumb links
base_url = "http://thegamesdb.net/banners/"
atari_df['URL'] = base_url + atari_df['thumb'].astype(str)
atari_df.head()

Unnamed: 0,id,GameTitle,ReleaseDate,thumb,Platform,URL
0,206,Commando,5/4/88,boxart/original/front/206-2.jpg,2600,http://thegamesdb.net/banners/boxart/original/...
1,207,DigDug,1/1/82,boxart/original/front/207-1.jpg,2600,http://thegamesdb.net/banners/boxart/original/...
2,10128,HERO,1/1/84,boxart/original/front/10128-1.jpg,2600,http://thegamesdb.net/banners/boxart/original/...
3,1292,Ghostbusters,1984,boxart/original/front/1292-2.jpg,2600,http://thegamesdb.net/banners/boxart/original/...
4,1341,Amidar,1/1/82,boxart/original/front/1341-1.jpg,2600,http://thegamesdb.net/banners/boxart/original/...


In [7]:
# create list from URLs
atari_piclist = atari_df["URL"].tolist()
atari_piclist

['http://thegamesdb.net/banners/boxart/original/front/206-2.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/207-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/10128-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1292-2.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1341-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1342-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1343-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1344-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1345-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1346-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1347-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1348-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1349-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1350-1.jpg',
 'http://thegamesdb.net/banners/boxart/original/front/1351-1.jp

In [8]:
# check link quality in list

print(atari_piclist[3])

http://thegamesdb.net/banners/boxart/original/front/1292-2.jpg


In [10]:
print(atari_piclist)

['http://thegamesdb.net/banners/boxart/original/front/206-2.jpg', 'http://thegamesdb.net/banners/boxart/original/front/207-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/10128-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1292-2.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1341-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1342-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1343-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1344-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1345-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1346-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1347-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1348-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1349-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1350-1.jpg', 'http://thegamesdb.net/banners/boxart/original/front/1351-1.jpg', 'http://th

In [11]:
# Break list into pages of 30
paginated_atari_urls = {}
url_list = []
count = 0
for url in atari_piclist:
    if count % 30 == 0:
        paginated_atari_urls[count // 30] = url_list
        url_list = []
    url_list.append(url)
    count += 1
paginated_atari_urls[(count // 30) + 1] = url_list
url_list = []
paginated_atari_urls

{0: [],
 1: ['http://thegamesdb.net/banners/boxart/original/front/206-2.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/207-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/10128-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1292-2.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1341-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1342-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1343-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1344-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1345-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1346-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1347-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1348-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1349-1.jpg',
  'http://thegamesdb.net/banners/boxart/original/front/1350-1.jpg',
  'http://thegamesdb.net/banners/boxar

In [20]:
# # Test open save a single image before looping through all

# fd = urlopen(Request('http://thegamesdb.net/banners/boxart/original/front/206-2.jpg', headers={'User-Agent': 'Chrome'}))
# im = Image.open(fd)
# im.thumbnail([180,180])
# im = im.convert("RGB")
# im.save(f'tiles/{url[52:]}',format='JPEG')

In [21]:
# test links in atari_piclist

my_list_len = len(atari_piclist)

for i in range(0, 2):
     print(atari_piclist[i])

http://thegamesdb.net/banners/boxart/original/front/206-2.jpg
http://thegamesdb.net/banners/boxart/original/front/207-1.jpg


In [22]:
# Loop through all pages to retrieve, resize, and save cover art images to tiles folder

for i in range(1, 28):
    for url in paginated_atari_urls[i]:
        try:
            fd = urlopen(Request(url, headers={'User-Agent': 'Chrome'}))
            im = Image.open(fd)
            im.thumbnail([180,180])
            im = im.convert("RGB")
            im.save(f'tiles/{url[52:]}',format='JPEG')
        except KeyError:
            print('err')
        except ConnectionResetError:
            print('err1')
    print(i)
print('DONE')



1
2
3
4
5
6
7
8
9
10
11
12


  'to RGBA images')


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
DONE
