In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sea
import numpy as np

In [3]:
games = pd.read_csv('video-games.csv', encoding='utf-8')

In [4]:
# Questões a serem respondidas
# - Número de vendas (global) por gênero
# - Vendas por continente
# - Vendas por publisher

In [5]:
games.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


In [6]:
# Organização dos dados
indexMissingElements = games['Year'].index[games['Year'].apply(np.isnan)]

In [7]:
games[games['Genre'] == 'Strategy']

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
165,166,Pokemon Stadium,N64,1999.0,Strategy,Nintendo,3.18,1.24,0.94,0.09,5.45
204,205,Warzone 2100,PS,1999.0,Strategy,Eidos Interactive,2.79,1.89,0.00,0.33,5.01
217,218,StarCraft II: Wings of Liberty,PC,2010.0,Strategy,Activision,2.56,1.68,0.00,0.59,4.83
267,268,Warcraft II: Tides of Darkness,PC,1995.0,Strategy,Activision,1.70,2.27,0.00,0.23,4.21
335,336,Pokémon Trading Card Game,GB,1998.0,Strategy,Nintendo,1.49,0.73,1.38,0.10,3.70
...,...,...,...,...,...,...,...,...,...,...,...
16510,16513,Palais de Reine,PS2,2007.0,Strategy,Interchannel-Holon,0.00,0.00,0.01,0.00,0.01
16532,16535,STORM: Frontline Nation,PC,2011.0,Strategy,Unknown,0.00,0.01,0.00,0.00,0.01
16539,16542,Spore Galactic Adventures,PC,2009.0,Strategy,Electronic Arts,0.00,0.01,0.00,0.00,0.01
16555,16558,Codename: Panzers Complete Collection,PC,2016.0,Strategy,Nordic Games,0.00,0.01,0.00,0.00,0.01


In [8]:
# Estrutura do DataFrame

genres = games["Genre"].unique()
dictGenresAndSale = {}

for item in dictGenresAndSale:
    dictGenresAndSale.update({item: 0})
    
genres

array(['Sports', 'Platform', 'Racing', 'Role-Playing', 'Puzzle', 'Misc',
       'Shooter', 'Simulation', 'Action', 'Fighting', 'Adventure',
       'Strategy'], dtype=object)

In [9]:
# Função para obter os índices dos valores que foram 
# passados como parâmetro, retornando o índice e a coluna
# a qual o valor pertence

def getIndexes(dfObj, value): 
    listOfPositions = []
    result = dfObj.isin([value])
    seriesResult = result.any()
    columnNames = list(seriesResult[seriesResult == True].index)
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
        for row in rows:
            listOfPositions.append((row, col))
    return listOfPositions

In [10]:
for genre in genres:
    amount = 0
    indexes = getIndexes(games, genre)
    for index in indexes:
        amount += games['Global_Sales'][index[0]]
    dictGenresAndSale.update([(genre, amount)])

In [11]:
indexes = getIndexes(games, "Strategy")
amount = 0
for index in indexes:
    amount += games['Global_Sales'][index[0]]
    print(amount)

5.45
10.46
15.290000000000001
19.5
23.2
26.05
28.78
31.41
33.81
35.89
37.97
40.04
41.73
43.4
45.03
46.64
48.19
49.739999999999995
51.279999999999994
52.73
54.099999999999994
55.44
56.75
58.0
59.2
60.38
61.550000000000004
62.660000000000004
63.760000000000005
64.85000000000001
65.91000000000001
66.91000000000001
67.88000000000001
68.83000000000001
69.74000000000001
70.63000000000001
71.52000000000001
72.4
73.23
74.06
74.86
75.65
76.44000000000001
77.22000000000001
78.00000000000001
78.76000000000002
79.49000000000002
80.21000000000002
80.93000000000002
81.64000000000001
82.34000000000002
83.04000000000002
83.71000000000002
84.37000000000002
85.03000000000002
85.69000000000001
86.34000000000002
86.99000000000002
87.64000000000003
88.28000000000003
88.91000000000003
89.54000000000002
90.17000000000002
90.79000000000002
91.41000000000003
92.01000000000002
92.61000000000001
93.21000000000001
93.81
94.4
94.99000000000001
95.57000000000001
96.14
96.71
97.27
97.82
98.36999999999999
98.91999999

In [12]:
salesByGenre = pd.DataFrame(dictGenresAndSale.items(), columns=['Genre', 'GlobalSales'])

In [13]:
salesByGenre.sort_values(by=['GlobalSales'], inplace=True)

In [4]:
fig , ax = plt.subplots(figsize=(15,10))
ax = plt.axes()
plot = sea.barplot(x=salesByGenre["Genre"], y=salesByGenre["GlobalSales"], ax=ax)
ax.set_title('Global sales by Genre')
plt.xlabel('Genre')
plt.ylabel('Global Sales (in US$ Millions)')
plt.grid(True)
plt.xticks(rotation=30)
plt.show()

fig.savefig('images/global_sales_by_genre.png')

NameError: name 'plt' is not defined

# 