# Extracting data from Steam 

## Initial Setup

In [1]:
from bs4 import BeautifulSoup
import requests

## Connect to Steam webpage

In [2]:
r = requests.get("https://store.steampowered.com/tags/en/Action/")
r.status_code

200

In [3]:
html = r.content

In [4]:
soup = BeautifulSoup(html, "lxml")

In [5]:
with open ('steam.html','wb') as f:
    f.write(soup.prettify('utf-8'))

## What can we scrape from this webpage?
## 1) Try extracting the names of the top games from this page.
## 2) What tags contain the prices?  Can you extract the price information?
## 3) Get all of the header tags on the page
## 4) Can you get the text from each span tag with class equal to "top_tag"?
## 5) Under the "Narrow by Tag" section, there are a collection of tags (e.g. "Indie", "Adventure", etc.).  Write code to return these tags.
## 6) What else can be scraped from this webpage or others on the site?

## Now is your turn!

## Task#1: Titles

In [154]:
divs = soup.find_all('div',{'class':'popup_block_new flyout_tab_flyout responsive_slidedown'})
#divs

In [37]:
#Task 1: Names
links = [div.find('a') for div in divs]
links

[<a class="popup_menu_item" href="https://store.steampowered.com/?snr=1_241_4_action_12">
 										Home									</a>,
 <a class="popup_menu_item" href="https://store.steampowered.com/search/?filter=topsellers&amp;snr=1_241_4_action_12">
                                         Top Sellers                                    </a>,
 <a class="popup_menu_item" href="https://store.steampowered.com/genre/Free%20to%20Play/?snr=1_241_4_action_12">
 														Free to Play													</a>]

In [18]:
response = requests.get([l.get('href') for l in links][1])
response.status_code


200

In [19]:
html1=response.content

In [21]:
soup1 = BeautifulSoup(html1, "lxml")

In [23]:
with open ('top_sellers.html','wb') as f:
    f.write(soup1.prettify('utf-8'))

In [153]:
divs1 = soup1.find_all('div',{'id':'search_resultsRows'})
#divs1

In [32]:
names = soup1.find_all('span',class_='title')
names = [name.text for name in names]
names

['Farming Simulator 22',
 'CS:GO - Operation Riptide',
 'Battlefield™ 2042',
 'Forza Horizon 5',
 'Farming Simulator 22 - Year 1 Bundle',
 'Farming Simulator 22 - Year 1 Season Pass',
 'DEATH STRANDING',
 'Expansion - Hearts of Iron IV: No Step Back',
 'Back 4 Blood',
 'Valve Index VR Kit',
 'Football Manager 2022',
 'Life is Strange: True Colors Ultimate Edition',
 'Inscryption',
 'CS:GO Prime Status Upgrade',
 'DayZ',
 'Pillars of Eternity Collection',
 'New World',
 'FIFA 22',
 'FIFA 22',
 'Satisfactory',
 'Myth of Empires',
 'ICARUS',
 'Life is Strange: True Colors',
 'Ruined King: A League of Legends Story™',
 'Tavern Master',
 'Gunfire Reborn',
 'EA Play',
 'Trackmania United Forever',
 'Barotrauma',
 'Age of Empires IV',
 'OUTRIDERS',
 'Terraria',
 'Phasmophobia',
 'Arma 3',
 'The Forgotten City',
 'Battlefield™ 2042',
 'The Elder Scrolls V: Skyrim Anniversary Upgrade',
 '7 Days to Die',
 'Kingdom Come: Deliverance Royal Edition',
 'ELDEN RING',
 'ELDEN RING',
 'Melvor Idle',
 '

## Task#2: Prices

In [76]:
# prices = soup1.find_all('div',{'class':'col search_price discounted responsive_secondrow'})
# prices = [price.text if price!=0 else print('0') for price in prices]
# prices
# #len(prices) : 22

In [69]:
#[div for div in divs1][0]

In [77]:
# prices = soup1.find_all('div',{'class':'col search_price responsive_secondrow'})
# prices = [price.text if price!=0 else None for price in prices]
# prices
# #len(prices) :28. 28+22 = 50

In [95]:
prices = soup1.find_all('div',{'class':"col search_price_discount_combined responsive_secondrow"})
prices = [price.text.replace("\n","").strip() for price in prices]
prices = [price if len(price)<7 else price.split('€')[-2]+'€' for price in prices]
prices


['39,99€',
 '12,75€',
 '59,99€',
 '59,99€',
 '77,98€',
 '37,99€',
 '17,99€',
 '19,99€',
 '41,99€',
 '1 079,--€',
 '54,99€',
 '63,98€',
 '19,99€',
 '12,75€',
 '39,99€',
 '87,28€',
 '39,99€',
 '35,99€',
 '35,99€',
 '20,99€',
 '22,49€',
 '22,49€',
 '38,99€',
 '29,99€',
 '13,49€',
 '12,59€',
 '3,99€',
 '29,99€',
 '24,99€',
 '59,99€',
 '19,79€',
 '4,99€',
 '11,59€',
 '6,99€',
 '24,99€',
 '59,99€',
 '19,99€',
 '22,99€',
 '13,59€',
 '59,99€',
 '59,99€',
 '8,19€',
 '19,99€',
 '20,99€',
 '9,99€',
 '16,19€',
 '11,99€',
 '14,99€',
 '5,09€',
 '29,99€']

## Task#3: Headings

In [35]:
# Task 3
headings = soup1.find_all('h2')
headings

[<h2 class="pageheader full">
 			Top Sellers		</h2>]

## Task#4: Get Reviews for each game

In [151]:
# for i in soup1.find_all('div',{'class':"col search_reviewscore responsive_secondrow"}):
#     print(i.span)
    
reviews = [i.span for i in soup1.find_all('div',{'class':"col search_reviewscore responsive_secondrow"})]
reviews = [review['data-tooltip-html']  if review is not None else 'None' for review in reviews]
reviews = [string.split('<br>')[1][:3] if string!='None' else 'None' for string in reviews]
reviews

['None',
 'None',
 '26%',
 '79%',
 'None',
 'None',
 '92%',
 'None',
 '63%',
 'None',
 '92%',
 '89%',
 '96%',
 '88%',
 '69%',
 '86%',
 '71%',
 '80%',
 '80%',
 '97%',
 '72%',
 'None',
 '89%',
 '84%',
 '91%',
 '93%',
 'None',
 '93%',
 '92%',
 '86%',
 '63%',
 '98%',
 '97%',
 '90%',
 '96%',
 '26%',
 '64%',
 '87%',
 '80%',
 'None',
 'None',
 '93%',
 '86%',
 '82%',
 '92%',
 '78%',
 '88%',
 '86%',
 '96%',
 '94%']

## Task#5 : Find All the Narrow By Tags

In [162]:
tags = soup1.find_all('span',{'class':'tab_filter_control_label'})
tags = [tag.string for tag in tags]
tags


['Special Offers',
 'Indie',
 'Action',
 'Adventure',
 'Casual',
 'Simulation',
 'Strategy',
 'RPG',
 'Singleplayer',
 'Early Access',
 'Free to Play',
 '2D',
 'Atmospheric',
 'Violent',
 'Sports',
 'Massively Multiplayer',
 'Multiplayer',
 'Puzzle',
 'Story Rich',
 '3D',
 'Fantasy',
 'Pixel Graphics',
 'Colorful',
 'Racing',
 'Nudity',
 'Gore',
 'Sexual Content',
 'Exploration',
 'Cute',
 'Anime',
 'First-Person',
 'Funny',
 'Sci-fi',
 'Arcade',
 'Shooter',
 'Horror',
 'Family Friendly',
 'Retro',
 'Relaxing',
 'Great Soundtrack',
 'Open World',
 'Action-Adventure',
 'Platformer',
 'Co-op',
 'Survival',
 'Female Protagonist',
 'Combat',
 'Difficult',
 'Third Person',
 'VR',
 'Comedy',
 'Old School',
 'Stylized',
 'PvP',
 'FPS',
 'Visual Novel',
 'Choices Matter',
 'Online Co-Op',
 'Realistic',
 'Controller',
 'Physics',
 'Top-Down',
 'Dark',
 'Character Customization',
 'Mystery',
 'Sandbox',
 'Cartoony',
 "Shoot 'Em Up",
 'Multiple Endings',
 'Psychological Horror',
 'Tactical',
 'De

## Task#6: What Else ?

In [163]:
# Lets make a pandas dataframe 

