In [1]:
import pandas as pd
import numpy as np
import requests
import json

In [2]:
df = pd.read_csv('metacritic_games.csv')

In [3]:
pd.set_option('display.max_columns', len(df))

In [4]:
df.shape

(5699, 19)

## Verificando quais colunas possuem valores nulos ou missing
Podemos verificar que as colunas developer, genre, number_players e rating possuem valores faltantes.

In [5]:
df.isnull().any()

game                False
platform            False
developer            True
genre                True
number_players       True
rating               True
release_date        False
positive_critics    False
neutral_critics     False
negative_critics    False
positive_users      False
neutral_users       False
negative_users      False
metascore           False
user_score          False
Unnamed: 15          True
Unnamed: 16          True
Unnamed: 17          True
Unnamed: 18          True
dtype: bool

### Devido ao fato de que alguns jogos estão com o nome da desenvolvedora em branco (NaN), foi efetuada uma consulta em uma API pública para obter o nome. Para isso, foi utilizado o site IGDB.com, que fornece uma API para consulta de dados de videogames.

In [6]:
def requestInvolvedCompanies(game):
    url = "https://api-v3.igdb.com/games"
    
    payload = "search \"{}\";\nfields involved_companies;\nrelease_date.human;\nwhere version_parent = null;".format(game)
    
    headers = {
    'cookie': "__cfduid=d4bed4e84314697f5dcf282a31e3174bd1593302851",
    'user-key': "2078e56b79acab9b669a3bd18661c2ba"
    }
    
    try:
        response = requests.request("GET", url, data=payload, headers=headers)
    
        involved_companies = json.loads(response.text)    
        return involved_companies[0]['involved_companies'][0]
    
    except:
        print('Erro durante {}'.format(game))

In [7]:
def requestMainCompany(id_company):
    url = "https://api-v3.igdb.com/involved_companies"
    
    payload = "fields company; where id = {};\n".format(id_company)
    
    headers = {
    'cookie': "__cfduid=d4bed4e84314697f5dcf282a31e3174bd1593302851",
    'user-key': "2078e56b79acab9b669a3bd18661c2ba"
    }
    
    try:
        response = requests.request("GET", url, data=payload, headers=headers)        
        company = json.loads(response.text)
    
        return company[0]['company']
       
    except:
        print('Erro durante o id_company {}'.format(id_company))

In [9]:
def requestDeveloperName(id_developer):
    url = "https://api-v3.igdb.com/companies"
    
    payload = "fields *;\nwhere id = {};".format(id_developer)
    headers = {
    'cookie': "__cfduid=d4bed4e84314697f5dcf282a31e3174bd1593302851",
    'user-key': "2078e56b79acab9b669a3bd18661c2ba"
    }
    
    try:
        response = requests.request("GET", url, data=payload, headers=headers)        
        developer = json.loads(response.text)[0]
        return developer['name']
    
    except:
        print('Erro durante o id_developer {}'.format(id_developer))

In [11]:
remove = [1727, 1901, 1129, 4192]

df.drop(remove, inplace=True)

In [15]:
games = df[df['developer'].isna()]['game']

### Com nossas funções de acesso aos dados da API, podemos então percorrer os dados de nome do jogo do nosso dataframe e substituir os valores missing em nome da desenvolvedora

In [16]:
devs = list()

for game in games:
    id_company = requestInvolvedCompanies(game)
    id_developer = requestMainCompany(id_company)
    developer_name = requestDeveloperName(id_developer)
    devs.append(developer_name)

In [17]:
devs

['Electronic Arts',
 'Burut CT',
 'No Reply Games',
 'Red Barrels',
 'Housemarque',
 'Centauri Production',
 'Bungie',
 'Nude Maker',
 '612 Games']

In [None]:
index = [258, 620, 1103, 1434, 1562, 1609, 1818, 3500, 5520]

In [None]:
i = 0
while i < len(index):
    df['developer'].iloc[index[i]] = devs[i]
    i +=1

In [21]:
df['developer'].iloc[1558]

'Tango Gameworks'