# Proyecto
Para este módulo es necesario extraer la información de los archivos .json dentro del dataset, por lo cual se importan los módulos *pandas, numpy, ast, matplotlib*. 

In [1]:
import pandas as pd
import numpy as np
from ast import literal_eval
import matplotlib
import nltk

## Games
Se limpia la información de los juegos que tenemos a mano, teniendo en cuenta que solo es necesario los datos sobre su ID (que nos permitirá relacionarlo con los otros archivos para conseguir la información que se requiere), nombre, año de lanzamiento, desarrolladora, distribuidora y genero.

In [2]:
#with open('Dataset/steam_games.json') as g:
#    rows = [literal_eval(row) for row in g.readlines()]
games = pd.read_json('Dataset/steam_games.json', lines=True)
games = games.drop(columns=['url','reviews_url','specs', 'price', 'tags','early_access']) # Una vez analizado, se determinó que estas columnas no contienen información relevante
games['año'] = games['release_date'].str.extract(r'(\d{4})')                              # Se toma el dato sobre la fecha en que salió el juego, la cual será relevante más adelante
games = games.drop(columns=['release_date'])                                              # Este campo ya no tiene ninguna relevancia
games = games.dropna(axis=0,how="any",subset=['genres','developer','año'])                # Se eliminan los campos que estén vacios, no puede existir juego si no tiene desarrollador, genero o año de salida
games = games.sort_values(by='id')                                                        # Se ordenan los datos según si id
games = games.drop_duplicates(subset=['title','app_name','año'])                          # Se busca eliminar datos repetidos, pueden existir juegos con el mismo nombre pero que hayan salido en diferentes años o teniendo generos diferentes
games = games.drop(columns=['app_name'])                                                  # Columna con datos duplicados

def floatToStr(n):
    '''Funcion que me permite usar un tipo de dato diferente para los id, el cual mejora su uso para comparaciones.'''
    return str(int(n))

games['id'] = games['id'].apply(floatToStr)                                               # Se usa la función y se retorna a su respectiva serie

games # Mostrar los datos con los que se va a trabajar

Unnamed: 0,publisher,genres,title,id,developer,año
120416,Valve,[Action],Counter-Strike,10,Valve,2000
120413,Valve,[Action],Team Fortress Classic,20,Valve,1999
120424,Valve,[Action],Day of Defeat,30,Valve,2003
120418,Valve,[Action],Deathmatch Classic,40,Valve,2001
120414,Valve,[Action],Half-Life: Opposing Force,50,Gearbox Software,1999
...,...,...,...,...,...,...
118903,Ubisoft,[Action],Tom Clancy's Ghost Recon Future Soldier - Seas...,2028055,"Ubisoft Paris,Red Storm Entertainment",2012
118999,Team17 Digital Ltd,[Strategy],Worms Revolution Season Pass,2028056,Team17 Digital Ltd.,2012
118632,Activision,[Action],Call of Duty®: Black Ops II Season Pass,2028062,Treyarch,2013
118834,Ubisoft,"[Action, Adventure]",Assassin’s Creed® III Season Pass,2028103,Ubisoft Montreal,2012


## Reviews
Se toman los archivos dentro de user_reviews.json, del cual podemos identificar la identificación del usuario, el url hacia su perfil y finalmente el comentario o comentarios que realizo el mismo.

Es considerado importante la información que existe dentro de las reviews, más no quien emitió esta review. por lo que en consecuencia se separa la lista de reviews para convertirla en un dataframe individual.

In [3]:
with open('Dataset/user_reviews.json', 'r', encoding='utf-8') as file:
    rows = [literal_eval(row) for row in file.readlines()]
reviews = pd.DataFrame(rows)
reviews = reviews.drop(columns=["user_url"])                    # El link de la review no tiene relevancia para este caso
#reviews = reviews.dropna(axis=0,how="any",subset=['reviews','user_id']) usado en caso de que existieran valores nulos, se comprobó que no es así
reviews

Unnamed: 0,user_id,reviews
0,76561197970982479,"[{'funny': '', 'posted': 'Posted November 5, 2..."
1,js41637,"[{'funny': '', 'posted': 'Posted June 24, 2014..."
2,evcentric,"[{'funny': '', 'posted': 'Posted February 3.',..."
3,doctr,"[{'funny': '', 'posted': 'Posted October 14, 2..."
4,maplemage,"[{'funny': '3 people found this review funny',..."
...,...,...
25794,76561198306599751,"[{'funny': '', 'posted': 'Posted May 31.', 'la..."
25795,Ghoustik,"[{'funny': '', 'posted': 'Posted June 17.', 'l..."
25796,76561198310819422,"[{'funny': '1 person found this review funny',..."
25797,76561198312638244,"[{'funny': '', 'posted': 'Posted July 21.', 'l..."


In [5]:
# Separan los reviews y se usa un dataframe aparte para poder obtener los datos necesarios para verificar los juegos recomendados
lista_reviews = []
for i in reviews['reviews']:
    lista_reviews.append(i)
    
reviews_individual = pd.DataFrame(lista_reviews[0])
reviews_individual

lista2_reviews = lista_reviews[1:]
for e in lista2_reviews:
    aux = pd.DataFrame(e)
    reviews_individual = pd.concat((reviews_individual,aux),ignore_index=True)

reviews_individual

Unnamed: 0,funny,posted,last_edited,item_id,helpful,recommend,review
0,,"Posted November 5, 2011.",,1250,No ratings yet,True,Simple yet with great replayability. In my opi...
1,,"Posted July 15, 2011.",,22200,No ratings yet,True,It's unique and worth a playthrough.
2,,"Posted April 21, 2011.",,43110,No ratings yet,True,Great atmosphere. The gunplay can be a bit chu...
3,,"Posted June 24, 2014.",,251610,15 of 20 people (75%) found this review helpful,True,I know what you think when you see this title ...
4,,"Posted September 8, 2013.",,227300,0 of 1 people (0%) found this review helpful,True,For a simple (it's actually not all that simpl...
...,...,...,...,...,...,...,...
59300,,Posted July 10.,,70,No ratings yet,True,a must have classic from steam definitely wort...
59301,,Posted July 8.,,362890,No ratings yet,True,this game is a perfect remake of the original ...
59302,1 person found this review funny,Posted July 3.,,273110,1 of 2 people (50%) found this review helpful,True,had so much fun plaing this and collecting res...
59303,,Posted July 20.,,730,No ratings yet,True,:D


## Sentiment analysis
Se toma en consideración el analisis de los comentarios dentro del dataframe de reviews individuales, donde se toma en cuenta el significado de las palabras bajo el esquema NLP, que se mide según comentario positivo (2), neutro (1) o negativo (0).

Se hace uso de nltk como librería que nos ayudará a realizar todas las operaciones dentro de los comentarios.

In [6]:
# Con reviews individual se cambia reviews por comentario positivo (2), neutro (1), negativo (0)
nltk.download('all')
from nltk.sentiment.vader import SentimentIntensityAnalyzer as sia
from nltk.corpus import stopwords as sw
from nltk.tokenize import word_tokenize as wt
from nltk.stem import WordNetLemmatizer as wnl

[nltk_data] Downloading collection 'all'
[nltk_data]    | 
[nltk_data]    | Downloading package abc to
[nltk_data]    |     /home/jonasa12/nltk_data...
[nltk_data]    |   Package abc is already up-to-date!
[nltk_data]    | Downloading package alpino to
[nltk_data]    |     /home/jonasa12/nltk_data...
[nltk_data]    |   Package alpino is already up-to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger to
[nltk_data]    |     /home/jonasa12/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger is already up-
[nltk_data]    |       to-date!
[nltk_data]    | Downloading package averaged_perceptron_tagger_ru to
[nltk_data]    |     /home/jonasa12/nltk_data...
[nltk_data]    |   Package averaged_perceptron_tagger_ru is already
[nltk_data]    |       up-to-date!
[nltk_data]    | Downloading package basque_grammars to
[nltk_data]    |     /home/jonasa12/nltk_data...
[nltk_data]    |   Package basque_grammars is already up-to-date!
[nltk_data]    | Downloading pa

In [7]:
def tokenizeText(text:str):
    '''Toma una cadena de texto para filtrar las palabras, si existen en ingles, además si son de un lenguaje las cuales sean útiles para lo que se necesita'''
    tokens = wt(text.lower())
    f_tokens = [token for token in tokens if token not in sw.words('english')]
    lemmatizer = wnl()
    l_tokens = [lemmatizer.lemmatize(token) for token in f_tokens]
    p_text = ' '.join(l_tokens)
    return p_text

reviews_individual['review'] = reviews_individual['review'].apply(tokenizeText)
reviews_individual

Unnamed: 0,funny,posted,last_edited,item_id,helpful,recommend,review
0,,"Posted November 5, 2011.",,1250,No ratings yet,True,simple yet great replayability . opinion `` zo...
1,,"Posted July 15, 2011.",,22200,No ratings yet,True,'s unique worth playthrough .
2,,"Posted April 21, 2011.",,43110,No ratings yet,True,great atmosphere . gunplay bit chunky time end...
3,,"Posted June 24, 2014.",,251610,15 of 20 people (75%) found this review helpful,True,know think see title `` barbie dreamhouse part...
4,,"Posted September 8, 2013.",,227300,0 of 1 people (0%) found this review helpful,True,simple ( 's actually simple ! ) truck driving ...
...,...,...,...,...,...,...,...
59300,,Posted July 10.,,70,No ratings yet,True,must classic steam definitely worth buying .
59301,,Posted July 8.,,362890,No ratings yet,True,game perfect remake original half life . perso...
59302,1 person found this review funny,Posted July 3.,,273110,1 of 2 people (50%) found this review helpful,True,much fun plaing collecting resource xd first t...
59303,,Posted July 20.,,730,No ratings yet,True,:


In [8]:
# Se analizan los datos de review que se tienen
analyzer = sia()
def get_sentiment(text:str):
    score = analyzer.polarity_scores(text)
    
    sentiment = 0
    if score['pos'] > score['neg']:
        if score['pos'] > score['neu']:
            sentiment = 2
        else:
            sentiment = 1
    else:
        if score['neg'] < score['neu']:
            sentiment = 1
    return sentiment
reviews_individual['sentiment'] = reviews_individual['review'].apply(get_sentiment)
reviews_individual

Unnamed: 0,funny,posted,last_edited,item_id,helpful,recommend,review,sentiment
0,,"Posted November 5, 2011.",,1250,No ratings yet,True,simple yet great replayability . opinion `` zo...,1
1,,"Posted July 15, 2011.",,22200,No ratings yet,True,'s unique worth playthrough .,1
2,,"Posted April 21, 2011.",,43110,No ratings yet,True,great atmosphere . gunplay bit chunky time end...,1
3,,"Posted June 24, 2014.",,251610,15 of 20 people (75%) found this review helpful,True,know think see title `` barbie dreamhouse part...,1
4,,"Posted September 8, 2013.",,227300,0 of 1 people (0%) found this review helpful,True,simple ( 's actually simple ! ) truck driving ...,1
...,...,...,...,...,...,...,...,...
59300,,Posted July 10.,,70,No ratings yet,True,must classic steam definitely worth buying .,2
59301,,Posted July 8.,,362890,No ratings yet,True,game perfect remake original half life . perso...,2
59302,1 person found this review funny,Posted July 3.,,273110,1 of 2 people (50%) found this review helpful,True,much fun plaing collecting resource xd first t...,1
59303,,Posted July 20.,,730,No ratings yet,True,:,0


In [9]:
reviews_individual = reviews_individual.drop(columns=['funny', 'posted', 'last_edited', 'helpful', 'review'])
reviews_individual


Unnamed: 0,item_id,recommend,sentiment
0,1250,True,1
1,22200,True,1
2,43110,True,1
3,251610,True,1
4,227300,True,1
...,...,...,...
59300,70,True,2
59301,362890,True,2
59302,273110,True,1
59303,730,True,0


In [10]:
# Prueba basada en los juegos del año 2013
variable = games[games['año'] == '2013']
variable

Unnamed: 0,publisher,genres,title,id,developer,año
118109,Egosoft,"[Action, Simulation]",X Rebirth,2870,Egosoft,2013
118587,"2K Games, Aspyr (Mac)",[Action],BioShock Infinite,8870,"Irrational Games,Aspyr (Mac),Virtual Programmi...",2013
89577,"Hidden Path Entertainment, Inc.","[Strategy, Indie]",Defense Grid: Containment DLC,18523,"Hidden Path Entertainment, Inc.",2013
89566,Three Donkeys LLC,"[Indie, Strategy]",Spectromancer: Gathering of Power,22521,"Apus Software,Three Donkeys LLC",2013
119669,Dovetail Games - Trains,[Simulation],Train Simulator: Doncaster Works Route Add-On,24095,Dovetail Games,2013
...,...,...,...,...,...,...
102163,FeelThere,"[Casual, Indie, Strategy]",Bunny Mania 2,746770,Crispy Software Development,2013
102235,baKno Games,"[Casual, Indie, Simulation]",Yatzy,749010,baKno Games,2013
101185,Alawar Entertainment,"[Adventure, Casual, Indie]",Forest Legends: The Call of Love Collector's E...,765770,Alawar Stargaze,2013
118632,Activision,[Action],Call of Duty®: Black Ops II Season Pass,2028062,Treyarch,2013


### User Recommend
Para las recomendaciones de los usuarios se puede poner un archivo con los datos de tal forma que: id_juego, cantidad de *recomendaciones*, cantidad de *no recomendaciones*, año de lanzamiento. Para mostrar la información se tiene que hace sort en la columna necesaria

In [11]:
recommend = []
try:
    for i in variable['id']:
        scores = reviews_individual[reviews_individual['item_id'] == i]
        value = scores['recommend'].value_counts()
        #print(value)
        if value.__len__() == 0:
            continue
        else:
            if value.__len__() == 1:
                j = value.keys()
                for k in j:
                    if k:
                        recommend.append({"id":i,"value":value[True]})
except:
    print(recommend)


In [12]:
print(recommend)
salida = sorted(recommend, key=lambda i: i['value'], reverse=True)
print(salida[0:3])
output = salida[0:3]
nombres = []
for o in salida[0:3]:
    df = games[games['id']==o['id']]
    nombres.append(df['title'].values)
print(f"Los juegos mas recomendados en 2013 son: 1: '{nombres[0][0]}', 2: '{nombres[1][0]}', 3: '{nombres[2][0]}'")

[{'id': '35720', 'value': 29}, {'id': '39140', 'value': 21}, {'id': '39150', 'value': 11}, {'id': '105420', 'value': 1}, {'id': '108600', 'value': 46}, {'id': '201420', 'value': 2}, {'id': '204240', 'value': 7}, {'id': '204450', 'value': 39}, {'id': '207930', 'value': 1}, {'id': '208352', 'value': 1}, {'id': '208520', 'value': 4}, {'id': '209520', 'value': 1}, {'id': '210870', 'value': 1}, {'id': '210898', 'value': 3}, {'id': '211523', 'value': 1}, {'id': '212480', 'value': 37}, {'id': '212893', 'value': 2}, {'id': '213850', 'value': 10}, {'id': '214930', 'value': 1}, {'id': '214931', 'value': 1}, {'id': '214933', 'value': 1}, {'id': '215530', 'value': 7}, {'id': '215630', 'value': 1}, {'id': '215690', 'value': 1}, {'id': '215710', 'value': 1}, {'id': '215870', 'value': 1}, {'id': '217100', 'value': 2}, {'id': '217690', 'value': 2}, {'id': '218550', 'value': 2}, {'id': '219092', 'value': 2}, {'id': '219103', 'value': 3}, {'id': '219190', 'value': 3}, {'id': '219340', 'value': 1}, {'id'

In [16]:
games_without_genre = games.drop(columns=['genres'])

## Creando archivo parquet
Se ha determinado que el mejor formato para este caso de uso será el parquet, por lo cual es necesario tener los datasets en dicho formato.

In [17]:
import pyarrow as pa
from pyarrow import Table
import pyarrow.parquet as pq
# Lista de los datos que necesito para los schemas de los parquet
# Games: string, array, string, string, string, string
games_schema = pa.schema([('publisher', pa.string()), ('title', pa.string()), ('id', pa.string()), ('developer', pa.string()), ('año', pa.string())])
# Reviews: string, boolean, int
reviews_schema = pa.schema([('item_id',pa.string()), ('recommend', pa.bool_()), ('sentiment', pa.int8())])

table_games = Table.from_pandas(games_without_genre, schema=games_schema)
table_reviews = Table.from_pandas(reviews_individual, schema=reviews_schema)

pq.write_table(table_games, 'DatasetFinal/games.parquet')
pq.write_table(table_reviews, 'DatasetFinal/reviews.parquet')

## Playtime
Se toman los juegos de los usuarios y se buscan por genero el tiempo jugado

In [50]:
with open('Dataset/users_items.json', 'r', encoding='utf-8') as file:
    rows = [literal_eval(row) for row in file.readlines()]
items = pd.DataFrame(rows)
items = items.drop(columns=['user_url'])
items = items[items['items_count'] != 0]
items = items.sort_values(by='items_count',ascending=False)
items = items.drop_duplicates(subset=['steam_id','user_id'])
items
# Eliminar todos los usuarios que no tengan juegos

Unnamed: 0,user_id,items_count,steam_id,items
3708,phrostb,7762,76561198102767019,"[{'item_id': '300', 'item_name': 'Day of Defea..."
19055,thugnificent,6700,76561198000537256,"[{'item_id': '10', 'item_name': 'Counter-Strik..."
1104,chidvd,6410,76561198001518866,"[{'item_id': '10', 'item_name': 'Counter-Strik..."
6022,piepai,6132,76561198118965684,"[{'item_id': '10', 'item_name': 'Counter-Strik..."
5995,mayshowganmore,5027,76561198048151962,"[{'item_id': '10', 'item_name': 'Counter-Strik..."
...,...,...,...,...
77578,76561198087018128,1,76561198087018128,"[{'item_id': '205790', 'item_name': 'Dota 2 Te..."
81966,Ausninja_VH,1,76561198094343094,"[{'item_id': '45400', 'item_name': 'Fortix', '..."
55907,76561198055223832,1,76561198055223832,"[{'item_id': '205790', 'item_name': 'Dota 2 Te..."
63531,IBPro,1,76561198067988673,"[{'item_id': '4920', 'item_name': 'Natural Sel..."


In [59]:
items['items'].head(1).values

      dtype=object)

In [74]:
items_for_all_users = pd.DataFrame(columns=['item_id', 'item_name','playtime_forever','playtime_2weeks'])
for u in items['user_id']:
    usuario = items[items['user_id'] == u];
    for ui in usuario['items']:
        aux = pd.DataFrame(ui, columns=items_for_all_users.columns)
        items_for_all_users = pd.concat([aux, items_for_all_users],ignore_index=True)
items_for_all_users = items_for_all_users.sort_values(by='playtime_forever',ascending=False)
items_for_all_users

Unnamed: 0,item_id,item_name,playtime_forever,playtime_2weeks
5163,True,False,False,False
5282,True,False,False,False
838,False,False,False,False
2935,False,False,False,False
3351,False,False,False,False
...,...,...,...,...
3013,False,False,False,False
3010,False,False,False,False
3007,False,False,False,False
3006,False,False,False,False


In [8]:
games['genres'] = games['genres'].apply(lambda x : ', '.join(x) if isinstance(x,list) else x)
games['genres'] = games['genres'].str.split(', ')
games = games.explode('genres')
games['genres'].nunique()

22

In [9]:
games['genres'].value_counts() # Se indica que existen mayor cantidad de juegos Indies

genres
Indie                        15704
Action                       11170
Casual                        8238
Adventure                     8149
Strategy                      6891
Simulation                    6655
RPG                           5423
Free to Play                  2005
Early Access                  1441
Sports                        1251
Massively Multiplayer         1093
Racing                        1077
Design &amp; Illustration      460
Utilities                      334
Web Publishing                 265
Animation &amp; Modeling       183
Education                      125
Video Production               112
Software Training              101
Audio Production                88
Photo Editing                   77
Accounting                       7
Name: count, dtype: int64

In [42]:
a = games['genres'].unique()
for x in a:
    print(x)

Action
Free to Play
Adventure
Indie
Simulation
Strategy
RPG
Animation &amp; Modeling
Video Production
Casual
Racing
Massively Multiplayer
Sports
Early Access
Utilities
Audio Production
Design &amp; Illustration
Web Publishing
Photo Editing
Software Training
Education
Accounting


In [79]:
ugenres = games['genres'].unique()
hour_year_genre = []
for g in ugenres:
    games_with_genre = games[games['genres'] == g]
    years = games_with_genre['año'].unique()
    print(years)
    for gwg in years:
        tiempo = []
        playtime_genre = 0
        games_with_genre_year = games_with_genre[games_with_genre['año'] == gwg]
        for gwgy in games_with_genre_year['id']:
            for i in items['user_id']:
                items_owned = pd.DataFrame(columns=['item_id', 'item_name','playtime_forever','playtime_2weeks'])
                usuario = items[items['user_id'] == i];
                for ui in usuario['items']:
                    aux = pd.DataFrame(ui, columns=items_owned.columns)
                    items_owned = pd.concat([aux, items_owned],ignore_index=True)
                items_owned = items_owned.sort_values(by='playtime_forever',ascending=False)
                items_owned = items_owned.drop_duplicates(subset=['item_id','item_name'])
                items_owned = items_owned[items_owned['item_id'] == gwgy]
                if not items_owned.empty:
                    tiempo.append(items_owned['playtime_forever'].values[0])
                    print(tiempo)
        for x in tiempo:
            playtime_genre += x
        playtime_genre = round(playtime_genre/60,0)
        print('llegar')
        hour_year_genre.append({"genero":g,"año":gwg,"horas":int(playtime_genre)})

playtime = pd.DataFrame(hour_year_genre,columns=['genero','año','horas'])
playtime

['2000' '1999' '2003' '2001' '1998' '2004' '2010' '2005' '2006' '2007'
 '2008' '2009' '2011' '2012' '1994' '1995' '1996' '1997' '2013' '2002'
 '1993' '1990' '2017' '2014' '2016' '2015' '1983' '1984' '1991' '1989'
 '2018' '1988' '1992' '2019']
[0]
[0, 49]
[0, 49, 263]
[0, 49, 263, 0]
[0, 49, 263, 0, 10113]
[0, 49, 263, 0, 10113, 53]
[0, 49, 263, 0, 10113, 53, 0]
[0, 49, 263, 0, 10113, 53, 0, 0]
[0, 49, 263, 0, 10113, 53, 0, 0, 0]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6, 0]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6, 0, 0]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6, 0, 0, 783]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6, 0, 0, 783, 0]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6, 0, 0, 783, 0, 13]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6, 0, 0, 783, 0, 13, 1]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6, 0, 0, 783, 0, 13, 1, 0]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6, 0, 0, 783, 0, 13, 1, 0, 6]
[0, 49, 263, 0, 10113, 53, 0, 0, 0, 6, 0, 0, 783, 0, 13, 1, 0, 6, 12294]
[0, 49, 263, 0, 101

KeyboardInterrupt: 

In [49]:
hour_year_genre

[{'genero': '703310', 'año': '2000', 'horas': 305873},
 {'genero': '659510', 'año': '1999', 'horas': 356203},
 {'genero': '703320', 'año': '2003', 'horas': 555265},
 {'genero': '667291', 'año': '2001', 'horas': 580688},
 {'genero': '615250', 'año': '1998', 'horas': 629400},
 {'genero': '666220', 'año': '2004', 'horas': 2791778},
 {'genero': '901583', 'año': '2010', 'horas': 5829273},
 {'genero': '435420', 'año': '2005', 'horas': 6084159},
 {'genero': '550900', 'año': '2006', 'horas': 6267954},
 {'genero': '730840', 'año': '2007', 'horas': 6563188},
 {'genero': '708710', 'año': '2008', 'horas': 7127103},
 {'genero': '730870', 'año': '2009', 'horas': 9866295},
 {'genero': '901805', 'año': '2011', 'horas': 15436784},
 {'genero': '2028103', 'año': '2012', 'horas': 33621048},
 {'genero': '738470', 'año': '1994', 'horas': 33926122},
 {'genero': '901147', 'año': '1995', 'horas': 33934809},
 {'genero': '733760', 'año': '1996', 'horas': 33936084},
 {'genero': '580940', 'año': '1997', 'horas': 3

In [36]:
playtime_genre = 0
for x in tiempo:
    playtime_genre += x

print(playtime_genre)

116343


In [37]:
playtime_genre = round(playtime_genre/60,0)
print(playtime_genre)

1939.0


In [None]:
items_list = []
for i in items['items']:
    items_list.append(i)
print(items_list)
items_list = pd.DataFrame(items_list[0])
#print(items_list.describe)

lista2_items = items_list[1:]
print(lista2_items)
for e in lista2_items:
    #print(e)
    aux = pd.DataFrame(e)
    items_list = pd.concat((items_list,aux),ignore_index=True)

items_list

In [None]:
horas_jugadas_genero = pd.DataFrame(columns=["genero","año","horas jugadas"])

for genero in generos:
    for year in years:
        hora = 0
        for player in lista:
            for l in player:
                aux = games[games['id'] == float(l['item_id'])]
                if aux["año"].values.__len__() == 0:
                   pass
                else:
                    if aux["año"].values[0] == year:
                        if aux["genres"].values[0] == genero:
                            hora += l["playtime_forever"]
        new_record = pd.DataFrame([{"genero":genero,"año":year,"horas jugadas":hora}], columns=horas_jugadas.columns)
        horas_jugadas_genero = pd.concat([new_record, horas_jugadas], ignore_index=True)
horas_jugadas_genero

In [None]:
horas_jugadas_por_usuario = pd.DataFrame(columns=['user_id','genero','year','horas'])
for genero in generos:
    for year in years:
        horas = 0
        for u in items['user_id']:
            for i in items['items']:
                if 
                for j in i:
                    aux = games[games['id'] == j['item_id']]
                    if aux['id'].count() == 0:
                        pass
                    elif aux['id'].count() == 1:
                        horas += i['playtime_forever']
            print(horas)
            break
                    
                