In [1]:
# Import dependencies
import pandas as pd

In [2]:
# Read csv file into a DataFrame
games = pd.read_csv("bgg_dataset.csv", delimiter=';')
games.head()

Unnamed: 0,ID,Name,Year Published,Min Players,Max Players,Play Time,Min Age,Users Rated,Rating Average,BGG Rank,Complexity Average,Owned Users,Mechanics,Domains
0,174430.0,Gloomhaven,2017.0,1,4,120,14,42055,879,1,386,68323.0,"Action Queue, Action Retrieval, Campaign / Bat...","Strategy Games, Thematic Games"
1,161936.0,Pandemic Legacy: Season 1,2015.0,2,4,60,13,41643,861,2,284,65294.0,"Action Points, Cooperative Game, Hand Manageme...","Strategy Games, Thematic Games"
2,224517.0,Brass: Birmingham,2018.0,2,4,120,14,19217,866,3,391,28785.0,"Hand Management, Income, Loans, Market, Networ...",Strategy Games
3,167791.0,Terraforming Mars,2016.0,1,5,120,12,64864,843,4,324,87099.0,"Card Drafting, Drafting, End Game Bonuses, Han...",Strategy Games
4,233078.0,Twilight Imperium: Fourth Edition,2017.0,3,6,480,14,13468,870,5,422,16831.0,"Action Drafting, Area Majority / Influence, Ar...","Strategy Games, Thematic Games"


In [3]:
# Checking for null values
games.isnull().sum()

ID                       16
Name                      0
Year Published            1
Min Players               0
Max Players               0
Play Time                 0
Min Age                   0
Users Rated               0
Rating Average            0
BGG Rank                  0
Complexity Average        0
Owned Users              23
Mechanics              1598
Domains               10159
dtype: int64

In [4]:
# Review the games with no ID
noID= games[games["ID"].isnull()]
noID

Unnamed: 0,ID,Name,Year Published,Min Players,Max Players,Play Time,Min Age,Users Rated,Rating Average,BGG Rank,Complexity Average,Owned Users,Mechanics,Domains
10776,,Ace of Aces: Jet Eagles,1990.0,2,2,20,10,110,626,10778,2,,,
10835,,Die Erben von Hoax,1999.0,3,8,45,12,137,605,10837,2,,,
11152,,Rommel in North Africa: The War in the Desert ...,1986.0,2,2,0,12,53,676,11154,4,,,
11669,,Migration: A Story of Generations,2012.0,2,4,30,12,49,720,11671,200,,,
12649,,Die Insel der steinernen Wachter,2009.0,2,4,120,12,49,673,12651,3,,,
12764,,Dragon Ball Z TCG (2014 edition),2014.0,2,2,20,8,33,703,12766,250,,,
13282,,Dwarfest,2014.0,2,6,45,12,82,613,13284,175,,,
13984,,Hus,,2,2,40,0,38,628,13986,2,,,
14053,,Contrario 2,2006.0,2,12,0,14,37,630,14055,100,,,
14663,,Warage: Extended Edition,2017.0,2,6,90,10,49,764,14665,3,,,


In [5]:
# Drop rows that have more than 2 missing values
games.dropna(thresh=len(games.columns)-2, inplace =True)

In [6]:
# Drop rows where Owned Users is null
games.dropna(subset=['Owned Users'], inplace=True)

In [7]:
# Change numbers to int to remove decimals

games['ID']=games['ID'].astype(int)
games['Year Published']=games['Year Published'].astype(int)
games['Owned Users']=games['Owned Users'].astype(int)
games.head()

Unnamed: 0,ID,Name,Year Published,Min Players,Max Players,Play Time,Min Age,Users Rated,Rating Average,BGG Rank,Complexity Average,Owned Users,Mechanics,Domains
0,174430,Gloomhaven,2017,1,4,120,14,42055,879,1,386,68323,"Action Queue, Action Retrieval, Campaign / Bat...","Strategy Games, Thematic Games"
1,161936,Pandemic Legacy: Season 1,2015,2,4,60,13,41643,861,2,284,65294,"Action Points, Cooperative Game, Hand Manageme...","Strategy Games, Thematic Games"
2,224517,Brass: Birmingham,2018,2,4,120,14,19217,866,3,391,28785,"Hand Management, Income, Loans, Market, Networ...",Strategy Games
3,167791,Terraforming Mars,2016,1,5,120,12,64864,843,4,324,87099,"Card Drafting, Drafting, End Game Bonuses, Han...",Strategy Games
4,233078,Twilight Imperium: Fourth Edition,2017,3,6,480,14,13468,870,5,422,16831,"Action Drafting, Area Majority / Influence, Ar...","Strategy Games, Thematic Games"


In [14]:
# Change comma separated numbers to decimals
games['Rating Average'] = games['Rating Average'].apply(lambda x: x.replace(',','.'))
games['Complexity Average'] = games['Complexity Average'].apply(lambda x: x.replace(',','.'))
games.head()

Unnamed: 0,ID,Name,Year Published,Min Players,Max Players,Play Time,Min Age,Users Rated,Rating Average,BGG Rank,Complexity Average,Owned Users,Mechanics,Domains
0,174430,Gloomhaven,2017,1,4,120,14,42055,8.79,1,3.86,68323,"Action Queue, Action Retrieval, Campaign / Bat...","Strategy Games, Thematic Games"
1,161936,Pandemic Legacy: Season 1,2015,2,4,60,13,41643,8.61,2,2.84,65294,"Action Points, Cooperative Game, Hand Manageme...","Strategy Games, Thematic Games"
2,224517,Brass: Birmingham,2018,2,4,120,14,19217,8.66,3,3.91,28785,"Hand Management, Income, Loans, Market, Networ...",Strategy Games
3,167791,Terraforming Mars,2016,1,5,120,12,64864,8.43,4,3.24,87099,"Card Drafting, Drafting, End Game Bonuses, Han...",Strategy Games
4,233078,Twilight Imperium: Fourth Edition,2017,3,6,480,14,13468,8.7,5,4.22,16831,"Action Drafting, Area Majority / Influence, Ar...","Strategy Games, Thematic Games"


In [15]:
games.dtypes

ID                     int32
Name                  object
Year Published         int32
Min Players            int64
Max Players            int64
Play Time              int64
Min Age                int64
Users Rated            int64
Rating Average        object
BGG Rank               int64
Complexity Average    object
Owned Users            int32
Mechanics             object
Domains               object
dtype: object

In [16]:
games.isnull().sum()

ID                        0
Name                      0
Year Published            0
Min Players               0
Max Players               0
Play Time                 0
Min Age                   0
Users Rated               0
Rating Average            0
BGG Rank                  0
Complexity Average        0
Owned Users               0
Mechanics              1581
Domains               10136
dtype: int64

In [17]:
# Move mechanics column into it's own DataFrame. 
mechanics = games[['ID', 'Mechanics']]
mechanics.head()

Unnamed: 0,ID,Mechanics
0,174430,"Action Queue, Action Retrieval, Campaign / Bat..."
1,161936,"Action Points, Cooperative Game, Hand Manageme..."
2,224517,"Hand Management, Income, Loans, Market, Networ..."
3,167791,"Card Drafting, Drafting, End Game Bonuses, Han..."
4,233078,"Action Drafting, Area Majority / Influence, Ar..."


In [18]:
# Drop Mechanics field from games DF
games = games.drop('Mechanics', axis=1)
games.head()

Unnamed: 0,ID,Name,Year Published,Min Players,Max Players,Play Time,Min Age,Users Rated,Rating Average,BGG Rank,Complexity Average,Owned Users,Domains
0,174430,Gloomhaven,2017,1,4,120,14,42055,8.79,1,3.86,68323,"Strategy Games, Thematic Games"
1,161936,Pandemic Legacy: Season 1,2015,2,4,60,13,41643,8.61,2,2.84,65294,"Strategy Games, Thematic Games"
2,224517,Brass: Birmingham,2018,2,4,120,14,19217,8.66,3,3.91,28785,Strategy Games
3,167791,Terraforming Mars,2016,1,5,120,12,64864,8.43,4,3.24,87099,Strategy Games
4,233078,Twilight Imperium: Fourth Edition,2017,3,6,480,14,13468,8.7,5,4.22,16831,"Strategy Games, Thematic Games"


In [19]:
# Create a demo games Dataframe to test the database import
games_demo = games.head(20)
games_demo

Unnamed: 0,ID,Name,Year Published,Min Players,Max Players,Play Time,Min Age,Users Rated,Rating Average,BGG Rank,Complexity Average,Owned Users,Domains
0,174430,Gloomhaven,2017,1,4,120,14,42055,8.79,1,3.86,68323,"Strategy Games, Thematic Games"
1,161936,Pandemic Legacy: Season 1,2015,2,4,60,13,41643,8.61,2,2.84,65294,"Strategy Games, Thematic Games"
2,224517,Brass: Birmingham,2018,2,4,120,14,19217,8.66,3,3.91,28785,Strategy Games
3,167791,Terraforming Mars,2016,1,5,120,12,64864,8.43,4,3.24,87099,Strategy Games
4,233078,Twilight Imperium: Fourth Edition,2017,3,6,480,14,13468,8.7,5,4.22,16831,"Strategy Games, Thematic Games"
5,291457,Gloomhaven: Jaws of the Lion,2020,1,4,120,14,8392,8.87,6,3.55,21609,"Strategy Games, Thematic Games"
6,182028,Through the Ages: A New Story of Civilization,2015,2,4,120,14,23061,8.43,7,4.41,26985,Strategy Games
7,220308,Gaia Project,2017,1,4,150,12,16352,8.49,8,4.35,20312,Strategy Games
8,187645,Star Wars: Rebellion,2016,2,4,240,14,23081,8.42,9,3.71,34849,Thematic Games
9,12333,Twilight Struggle,2005,2,2,180,13,40814,8.29,10,3.59,56219,"Strategy Games, Wargames"


In [20]:
# Save games demo DF
games_demo.to_csv('games_demo.csv', sep=';', index=False)

In [21]:
# Create a demo mechanics DataFrame to test DB import
mechanics_demo = mechanics.head(20)
mechanics_demo

Unnamed: 0,ID,Mechanics
0,174430,"Action Queue, Action Retrieval, Campaign / Bat..."
1,161936,"Action Points, Cooperative Game, Hand Manageme..."
2,224517,"Hand Management, Income, Loans, Market, Networ..."
3,167791,"Card Drafting, Drafting, End Game Bonuses, Han..."
4,233078,"Action Drafting, Area Majority / Influence, Ar..."
5,291457,"Action Queue, Campaign / Battle Card Driven, C..."
6,182028,"Action Points, Auction/Bidding, Auction: Dutch..."
7,220308,"End Game Bonuses, Hexagon Grid, Income, Modula..."
8,187645,"Area Majority / Influence, Area Movement, Area..."
9,12333,"Action/Event, Advantage Token, Area Majority /..."


In [22]:
# Save mechanics demo to csv
mechanics_demo.to_csv('mechanics_demo.csv', sep=';', index=False)