## Where are the Steam users located around the world?

#### There are now >20 million concurrent users logged in to Steam.  Adding a location to the user's account is optional. We decided to scrape user volunteered information from gamedb to get a represenation of where steam's users are active around the globe.

In [1]:
#import dependencies
import pandas as pd
import pymongo
from bs4 import BeautifulSoup
from splinter import Browser

In [2]:
#connect to mongodb port
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [3]:
executable_path = {'executable_path': './chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
# Visit the GameDB site
url = 'https://gamedb.online/players?country=US'
browser.visit(url)

In [5]:
# Scrape page into Soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [6]:
# Determine element that contains sought user by country info
countries = soup.find('optgroup', id='countries-group')
for value in countries.stripped_strings:
     print(value)


No Country (360,941)
Afghanistan (1,203)
Aland Islands (231)
Albania (641)
Algeria (829)
American Samoa (338)
Andorra (216)
Angola (201)
Anguilla (82)
Antarctica (2,437)
Antigua and Barbuda (129)
Argentina (3,947)
Armenia (449)
Aruba (159)
Australia (20,016)
Austria (2,858)
Azerbaijan (515)
Bahamas (421)
Bahrain (612)
Bangladesh (716)
Barbados (133)
Belarus (1,630)
Belgium (3,381)
Belize (102)
Benin (43)
Bermuda (149)
Bhutan (167)
Bolivia (216)
Bonaire, Sint Eustatius and Saba (122)
Bosnia and Herzegovina (1,015)
Botswana (173)
Bouvet Island (71)
Brazil (13,053)
British Indian Ocean Territory (112)
British Virgin Islands (355)
Brunei (293)
Bulgaria (1,770)
Burkina Faso (120)
Burundi (53)
Cambodia (211)
Cameroon (71)
Canada (31,686)
Cape Verde (70)
Cayman Islands (114)
Central African Republic (141)
Chad (793)
Chile (1,788)
China (7,522)
Christmas Island (1,050)
Cocos (Keeling) Islands (503)
Colombia (986)
Comoros (15)
Cook Islands (286)
Costa Rica (528)
Croatia (1,821)
Cuba (483)
Curaç

In [7]:
# Clean country & number of users values by removing unwanted chracters from objects
country = [x.text.split('(') for x in countries]
c1 = pd.DataFrame(country)
c2 = c1.rename(columns={0: "Country", 1: "Users"}).drop(columns=[2])
c2

Unnamed: 0,Country,Users
0,\nNo Country,"360,941)\n"
1,\nAfghanistan,"1,203)\n"
2,\nAland Islands,231)\n
3,\nAlbania,641)\n
4,\nAlgeria,829)\n
...,...,...
246,\nWallis and Futuna,130)\n
247,\nWestern Sahara,162)\n
248,\nYemen,250)\n
249,\nZambia,204)\n


In [8]:
countries_steam_users = c2.replace({"\n":""}, regex=True)

In [9]:
countries_steam_users['Users'] = countries_steam_users['Users'].str.replace(')','').astype(object)

In [17]:
# Save DF to csv file and display
countries_steam_users.to_csv('countries_steam_users.csv', index = False)
countries_steam_users

Unnamed: 0,Country,Users
0,No Country,360941
1,Afghanistan,1203
2,Aland Islands,231
3,Albania,641
4,Algeria,829
...,...,...
246,Wallis and Futuna,130
247,Western Sahara,162
248,Yemen,250
249,Zambia,204


In [11]:
# Determine element that contains sought user by US state info
us_states = soup.find('select', id='state')
for value in us_states.stripped_strings:
     print(value)

All States (183,628)
No State (70,200)
---
Alabama (1,275)
Alaska (1,052)
American Samoa (56)
Arizona (2,818)
Arkansas (732)
California (16,607)
Colorado (2,815)
Connecticut (936)
Delaware (267)
District Of Columbia (319)
Federated State Of Micronesia (249)
Florida (7,084)
Georgia (2,644)
Guam (51)
Hawaii (588)
Idaho (716)
Illinois (3,825)
Indiana (1,968)
Iowa (865)
Kansas (871)
Kentucky (1,216)
Louisiana (977)
Maine (633)
Marshall Islands (11)
Maryland (1,734)
Massachusetts (2,432)
Michigan (3,714)
Minnesota (2,188)
Mississippi (403)
Missouri (1,918)
Montana (407)
Nebraska (570)
Nevada (1,210)
New Hampshire (576)
New Jersey (2,156)
New Mexico (530)
New York (6,569)
North Carolina (2,655)
North Dakota (241)
Northern Mariana Islands (18)
Ohio (3,919)
Oklahoma (1,046)
Oregon (2,954)
Palau (27)
Pennsylvania (4,089)
Puerto Rico (82)
Rhode Island (334)
South Carolina (989)
South Dakota (202)
Tennessee (1,754)
Texas (9,461)
Utah (1,335)
Vermont (262)
Virgin Islands (204)
Virginia (2,526)
Was

In [12]:
# Clean US state & number of users values by removing unwanted chracters from objects
state = [x.text.split('(') for x in us_states]
s1 = pd.DataFrame(state)
us_states_steam_users = s1.rename(columns={0: "State", 1: "Users"})
us_states_steam_users

Unnamed: 0,State,Users
0,All States,"183,628)"
1,No State,"70,200)"
2,---,
3,Alabama,"1,275)"
4,Alaska,"1,052)"
...,...,...
57,Virginia,"2,526)"
58,Washington,"5,603)"
59,West Virginia,505)
60,Wisconsin,"1,998)"


In [13]:
us_states_steam_users['Users'] = us_states_steam_users['Users'].str.replace(')','').astype(object)

In [14]:
# Save DF to csv file and display
us_states_steam_users.to_csv('us_states_steam_users.csv', encoding='utf-8-sig', index = False)
us_states_steam_users

Unnamed: 0,State,Users
0,All States,183628
1,No State,70200
2,---,
3,Alabama,1275
4,Alaska,1052
...,...,...
57,Virginia,2526
58,Washington,5603
59,West Virginia,505
60,Wisconsin,1998


In [15]:
# Determine element that contains sought user by continent info
continents = soup.find('optgroup', label='Continents')
for value in continents.stripped_strings:
     print(value)

Africa (16,795)
Antarctica (4,003)
Asia (71,312)
Europe (202,041)
North America (225,778)
South America (24,335)
Australia (28,990)


In [16]:
browser.quit()