# Import libraries

In [1]:
import json
import os
from pymongo import MongoClient
from bson.objectid import ObjectId
import pandas as pd
import numpy as np

# Setup MongoDB

In [4]:
## Connect to remote mongodb

from sshtunnel import SSHTunnelForwarder
import pymongo

with open('keys.json') as k:
    keys = json.loads(k.read())
    MONGO_HOST = keys["SSH"]["MONGO_HOST"] # i.e. 10.9.13.14
    MONGO_DB = keys["SSH"]["MONGO_DB"] # i.e. dm_project 
    MONGO_USER = keys["SSH"]["MONGO_USER"] # i.e. studente
    MONGO_PASS = keys["SSH"]["MONGO_PASS"] # i.e. la password della vm

server = SSHTunnelForwarder(
    MONGO_HOST,
    ssh_username=MONGO_USER,
    ssh_password=MONGO_PASS,
    remote_bind_address=('127.0.0.1', 27017)
)

server.start() # remember to stop

client = pymongo.MongoClient('127.0.0.1', server.local_bind_port) # server.local_bind_port is assigned local port
db = client[MONGO_DB]

In [7]:
top_games = db.vgchartz

In [8]:
db.list_collection_names()

['system.indexes', 'twitch', 'twitter']

# Load dataset

In [9]:
vgsales = pd.read_csv("../dataset/vgsales.csv")
vgsales.head()

Unnamed: 0,Rank,Name,Platform,Year,href_Genre,Publisher,Developer,Critic_Score,User_Score,NA_Sales,PAL_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,http://www.vgchartz.com/game/2667/wii-sports/?...,Nintendo,Nintendo EAD,7.7,,41.36,29.02,3.77,8.51,82.65
1,2,Super Mario Bros.,NES,1985.0,http://www.vgchartz.com/game/6455/super-mario-...,Nintendo,Nintendo EAD,10.0,8.2,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,http://www.vgchartz.com/game/6968/mario-kart-w...,Nintendo,Nintendo EAD,8.2,9.1,15.91,12.92,3.8,3.35,35.98
3,4,PLAYERUNKNOWN'S BATTLEGROUNDS,PC,2017.0,http://www.vgchartz.com/game/215988/playerunkn...,PUBG Corporation,PUBG Corporation,,,,,,,
4,5,Wii Sports Resort,Wii,2009.0,http://www.vgchartz.com/game/24656/wii-sports-...,Nintendo,Nintendo EAD,8.0,8.8,15.61,10.99,3.29,3.02,32.9


In [10]:
vgsales_without_na = vgsales[np.isfinite(vgsales['Global_Sales'])]
vgsales_without_na.head()

Unnamed: 0,Rank,Name,Platform,Year,href_Genre,Publisher,Developer,Critic_Score,User_Score,NA_Sales,PAL_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,http://www.vgchartz.com/game/2667/wii-sports/?...,Nintendo,Nintendo EAD,7.7,,41.36,29.02,3.77,8.51,82.65
1,2,Super Mario Bros.,NES,1985.0,http://www.vgchartz.com/game/6455/super-mario-...,Nintendo,Nintendo EAD,10.0,8.2,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,http://www.vgchartz.com/game/6968/mario-kart-w...,Nintendo,Nintendo EAD,8.2,9.1,15.91,12.92,3.8,3.35,35.98
4,5,Wii Sports Resort,Wii,2009.0,http://www.vgchartz.com/game/24656/wii-sports-...,Nintendo,Nintendo EAD,8.0,8.8,15.61,10.99,3.29,3.02,32.9
5,6,Pokémon Red / Green / Blue Version,GB,1998.0,http://www.vgchartz.com/game/4030/pokemon-red-...,Nintendo,Game Freak,9.4,,11.27,8.89,10.22,1.0,31.37


In [11]:
df_dict = vgsales_without_na.to_dict('records')

In [12]:
df_dict

[{'Rank': 1,
  'Name': 'Wii Sports',
  'Platform': 'Wii',
  'Year': 2006.0,
  'href_Genre': 'http://www.vgchartz.com/game/2667/wii-sports/?region=All',
  'Publisher': 'Nintendo  ',
  'Developer': 'Nintendo EAD  ',
  'Critic_Score': 7.7,
  'User_Score': nan,
  'NA_Sales': 41.36,
  'PAL_Sales': 29.02,
  'JP_Sales': 3.77,
  'Other_Sales': 8.51,
  'Global_Sales': 82.65},
 {'Rank': 2,
  'Name': 'Super Mario Bros.',
  'Platform': 'NES',
  'Year': 1985.0,
  'href_Genre': 'http://www.vgchartz.com/game/6455/super-mario-bros/?region=All',
  'Publisher': 'Nintendo  ',
  'Developer': 'Nintendo EAD  ',
  'Critic_Score': 10.0,
  'User_Score': 8.2,
  'NA_Sales': 29.08,
  'PAL_Sales': 3.58,
  'JP_Sales': 6.81,
  'Other_Sales': 0.77,
  'Global_Sales': 40.24},
 {'Rank': 3,
  'Name': 'Mario Kart Wii',
  'Platform': 'Wii',
  'Year': 2008.0,
  'href_Genre': 'http://www.vgchartz.com/game/6968/mario-kart-wii/?region=All',
  'Publisher': 'Nintendo  ',
  'Developer': 'Nintendo EAD  ',
  'Critic_Score': 8.2,
  

In [13]:
top_games.insert_many(df_dict)

<pymongo.results.InsertManyResult at 0x207df6e5348>

In [14]:
db.list_collection_names()

['system.indexes', 'twitch', 'twitter', 'vgchartz']

In [15]:
server.stop()