In [1]:
import requests
import pandas as pd
import numpy as np
from bs4 import BeautifulSoup

In [2]:
import os
#os.mkdir("data")

# Fish Table

Goal: read the "Fish Types" table from https://stardewvalleywiki.com/Fish into a Pandas dataframe

Request the Fish page, <Response [200]> indicates successful request.

In [3]:
fish_page = requests.get("https://stardewvalleywiki.com/Fish")
fish_page

<Response [200]>

In [4]:
soup = BeautifulSoup(fish_page.content, 'html.parser')

In [5]:
tables = soup.find_all('table', class_ = 'wikitable')
table1 = tables[0]

Compile the contents of the table into a list of lists, and convert into a dataframe.

In [6]:
data = []
row_marker = 0
for row in table1.find_all('tr'):
    stuff = []
    column_marker = 0
    for column in row.find_all('td'):
        if column.find('table'):
            continue
        stuff.append(column.get_text())
        column_marker += 1
    data.append(stuff)

data = [x for x in data if len(x)>5]

In [8]:
desired_cols = [1, 2] + list(range(27, 35))

fish_df = pd.DataFrame(data, columns = list(range(35)))
fish_df = fish_df[desired_cols]

fish_df.head()

Unnamed: 0,1,2,27,28,29,30,31,32,33,34
0,Pufferfish\n,Inflates when threatened.\n,Ocean\n,12pm – 4pm\n,Summer\n,Sun\n,1-37\n,80 floater\n,29\n,"Specialty Fish Bundle Abigail (loved gift) ""..."
1,Anchovy\n,A small silver fish found in the ocean.\n,Ocean\n,Anytime\n,Spring Fall\n,Any\n,1-17\n,30 dart\n,13\n,\n
2,Tuna\n,A large fish that lives in the ocean.\n,Ocean\n,6am – 7pm\n,Summer Winter\n,Any\n,12-61\n,70 smooth\n,26\n,Ocean Fish Bundle Fish Taco\n
3,Sardine\n,A common ocean fish.\n,Ocean\n,6am – 7pm\n,Spring Fall Winter\n,Any\n,1-13\n,30 dart\n,13\n,Ocean Fish Bundle Dish o' The Sea\n
4,Bream\n,A fairly common river fish that becomes activ...,River (Town+Forest)\n,6pm – 2am\n,All Seasons\n,Any\n,12-31\n,35 smooth\n,14\n,Night Fishing Bundle Baked Fish\n


## Data Cleaning

Remove the '\n' newline at the end of string:

In [9]:
def remove_newline(text):
    return text.replace('\n', '')
    
fish_df = fish_df.applymap(remove_newline)
fish_df.head()

Unnamed: 0,1,2,27,28,29,30,31,32,33,34
0,Pufferfish,Inflates when threatened.,Ocean,12pm – 4pm,Summer,Sun,1-37,80 floater,29,"Specialty Fish Bundle Abigail (loved gift) ""..."
1,Anchovy,A small silver fish found in the ocean.,Ocean,Anytime,Spring Fall,Any,1-17,30 dart,13,
2,Tuna,A large fish that lives in the ocean.,Ocean,6am – 7pm,Summer Winter,Any,12-61,70 smooth,26,Ocean Fish Bundle Fish Taco
3,Sardine,A common ocean fish.,Ocean,6am – 7pm,Spring Fall Winter,Any,1-13,30 dart,13,Ocean Fish Bundle Dish o' The Sea
4,Bream,A fairly common river fish that becomes activ...,River (Town+Forest),6pm – 2am,All Seasons,Any,12-31,35 smooth,14,Night Fishing Bundle Baked Fish


Rename columns from default numbers to actual column names on the website, convert 'Base XP' column from strings to ints:

In [10]:
site_cols = ["Name", "Description", "Location", "Time", "Seasons", "Weather", "Size (inches)",
             "Difficult and Behavior", "Base XP", "Used In"]

col_map = dict(zip(desired_cols, site_cols))

fish_df = fish_df.rename(columns = col_map)
fish_df = fish_df.set_index("Name")

fish_df["Base XP"] = pd.to_numeric(fish_df["Base XP"], errors='coerce')

fish_df.head()

Unnamed: 0_level_0,Description,Location,Time,Seasons,Weather,Size (inches),Difficult and Behavior,Base XP,Used In
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Pufferfish,Inflates when threatened.,Ocean,12pm – 4pm,Summer,Sun,1-37,80 floater,29,"Specialty Fish Bundle Abigail (loved gift) ""..."
Anchovy,A small silver fish found in the ocean.,Ocean,Anytime,Spring Fall,Any,1-17,30 dart,13,
Tuna,A large fish that lives in the ocean.,Ocean,6am – 7pm,Summer Winter,Any,12-61,70 smooth,26,Ocean Fish Bundle Fish Taco
Sardine,A common ocean fish.,Ocean,6am – 7pm,Spring Fall Winter,Any,1-13,30 dart,13,Ocean Fish Bundle Dish o' The Sea
Bream,A fairly common river fish that becomes activ...,River (Town+Forest),6pm – 2am,All Seasons,Any,12-31,35 smooth,14,Night Fishing Bundle Baked Fish


In [11]:
fish_df.to_csv("data/fish.csv")

# Villager Images

Goal: download the thumbnail images of all the "villagers" on https://stardewvalleywiki.com/Villagers

In [12]:
villager_page = requests.get("https://stardewvalleywiki.com/Villagers")
soup2 = BeautifulSoup(villager_page.content, 'html.parser')

Download the images in all instances of the "thumb" class, which contain the thumbnail image of the villager and a link to its character page. 

In [13]:
thumbs = soup2.find_all(class_ = 'thumb')

for thumb in thumbs:  
    img_html = thumb.find('img')

    file_name = img_html.get('src')
    img_name = file_name[file_name.rfind('/') + 1 :]
    url = 'http://www.stardewvalleywiki.com' + file_name

    f = open("data/" + img_name,'wb')
    f.write(requests.get(url).content)
    f.close()

### Some scraped Villager images:
![title](data/Abigail.png)
![title](data/Alex.png)
![title](data/Bouncer.png)