# Preparing Data for Network Analysis
The data from friend connections, list of owned games, and list of game genre are used to generate a dataframe of players with their playing time in each genre.

In [None]:
import numpy as np
import pandas as pd

# file paths for reading data
path_friends = "...\sql_friends.csv"
path_games_owned = "...\sql_games_owned.csv"
path_games_genre = "...\sql_games_genre.csv"

In [None]:
# Creating dataframe of playing time based on genre
# reading friend connections
df = pd.read_csv(path_friends)
df = df.drop('Unnamed: 0', axis='columns')

# reading list of owned games
df_games_owned = pd.read_csv(path_games_owned)
df_games_owned = df_games_owned.drop('Unnamed: 0', axis='columns')
df_games_owned.set_index(['PlayerID'], inplace=True)

# reading genre of games
df_games_genre = pd.read_csv(path_games_genre)
df_games_genre = df_games_genre.drop('Unnamed: 0', axis='columns')
df_games_genre.set_index(['GameID'], inplace=True)

# creating a dataframe of players
all_players =  np.unique(np.append(df['steamid_a'].unique(),df['steamid_b'].unique()))
df_pltime = pd.DataFrame(all_players, columns=['PlayerID'])

# list of genres considred in the analysis
list_genre = ['Action', 'Free to Play', 'Strategy', 'Adventure', 'Indie', 'RPG', 'Casual', \
              'Simulation', 'Racing', 'Massively Multiplayer', 'Sports', 'Early Access']

# creating a full dataframe including genres
for genre in list_genre:
    df_pltime[genre] = 0
df_pltime.set_index(['PlayerID'], inplace=True)

# calculating playing time in each genre
i=0
for player in all_players:
    i += 1
    # printing the progress
    if (i % 10000 == 0) : print('%0.2f' % (i/len(all_players)*100))
    if player in df_games_owned.index:
        df_games_time = df_games_owned.loc[player]
        for index, game in enumerate(pd.Series(df_games_time['GameID'])):
            if game in df_games_genre.index:
                list_genre = pd.Series(df_games_genre.loc[game]['Genre'])
                for genre in list_genre:
                    if genre in df_pltime.columns:
                        df_pltime.loc[player][genre] += int(pd.Series(df_games_time['Alltime_Played']).iloc[index])
                    else:
                        continue
            else:
                continue
    else:
        continue
# calculating the total playing time of each player
df_pltime['Total'] = df_pltime.sum(axis = 1)

# saving generated datafarme
df_pltime.to_csv('...\\sql_play_time_all.csv')