In [1]:
import pandas as pd

In [2]:
steam_data = pd.read_csv("../data/steam_data_final.csv")
steam_review = pd.read_csv("../output/sentiment_data.csv")

#### Getting a list of steam ids for the data that we have:

In [3]:
steam_app_ids = steam_data['steam_appid'].tolist()
steam_app_ids = list(set(steam_app_ids))

#### Removing rows depending on the app id:

In [4]:
flt_review = steam_review[steam_review.app_id.isin(steam_app_ids)]

#### Getting sentiment count for each game:

In [5]:
sentiment_count = pd.DataFrame({'count' : flt_review.groupby( [ "app_id", "app_name","sentiment"] ).size()}).reset_index()

#### Saving data

In [6]:
#flt_review.to_csv("../output/filtered_review_data.txt",sep="\t", index=False)
sentiment_count.to_csv("../output/sentiment_count.txt", sep="\t", index=False)

#### Separating reviews based on different genres

In [7]:
genre_data = steam_data[['steam_appid','genre']].drop_duplicates()
genre_data.rename(columns={"steam_appid": "app_id"}, inplace=True)

In [8]:
genre_review = pd.merge(flt_review, genre_data, how="left", on="app_id")

In [9]:
genre_review = genre_review.assign(genre_type=genre_review['genre'].str.split(',')).explode('genre_type')

In [10]:
#genre_review.to_csv("../output/filtered_review_data.txt",sep="\t", index=False)

In [11]:
genre_count = pd.DataFrame({'count' : genre_review.groupby( [ "genre_type",] ).size()}).reset_index()

In [12]:
genre_count.to_csv("../output/genre_count.txt", sep="\t", index=False)

In [13]:
genre_gamereview_counts = pd.DataFrame({'count' : genre_review.groupby( [ "app_id","app_name","genre_type","sentiment"] ).size()}).reset_index()

In [14]:
genre_review_counts = pd.DataFrame({'count' : genre_review.groupby( [ "genre_type","sentiment"] ).size()}).reset_index()

In [15]:
#genre_review_counts.to_csv("../output/genre_review_counts.txt", sep="\t", header=False)
#genre_gamereview_counts.to_csv("../output/game_genre_sent_count.txt", sep="\t", header=False)

#### Creating different dataframes for all genres

In [16]:
genre_review.columns

Index(['app_id', 'app_name', 'review_id', 'language', 'review', 'recommended',
       'votes_helpful', 'votes_funny', 'weighted_vote_score', 'comment_count',
       'steam_purchase', 'received_for_free', 'written_during_early_access',
       'sentiment', 'genre', 'genre_type'],
      dtype='object')

In [17]:
def clean_genre_name(row):
    genre_name = row.lstrip().rstrip()   
    return genre_name

In [18]:
genre_review = genre_review[['app_id', 'app_name', 'review_id', 'language', 'review', 'recommended',
       'votes_helpful', 'votes_funny', 'weighted_vote_score','sentiment', 'genre', 'genre_type']]

In [19]:
genre_review['genre_type'] = genre_review['genre_type'].apply(clean_genre_name)

In [20]:
def create_genre_txt(main_df, file_name, genre):
    
    df = main_df[main_df['genre_type'] == str(genre)]
    df.to_csv("../output/genre_data/"+file_name, sep="\t", index=False)

In [21]:
genres = genre_count['genre_type'].tolist()
genres = list(set(genres))

In [23]:
for genre in genres:
    genre = genre.lstrip().rstrip()
    genre = genre.replace(" ", "_")
    print(genre)
    filename = str(genre)+"_review.txt"
    create_genre_txt(genre_review,filename,genre)

Massively_Multiplayer
Adventure
Indie
Early_Access
Sports
Simulation
Casual
Free_to_Play
Design_&_Illustration
Photo_Editing
Racing
Utilities
Massively_Multiplayer
Action
Strategy
Casual
Simulation
RPG
Adventure
Strategy
Racing
Animation_&_Modeling
RPG
Indie
