# Steam App Details Dataset

In [None]:
from datetime import date
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sb
import re
import json
import ast
import time

In [None]:
df_steam = pd.read_csv('./clean_datasets/steam.csv', index_col=0, dayfirst=True, parse_dates=['release_date'])
df_steam_tags = pd.read_csv('./clean_datasets/steamspy_tag_data.csv', index_col=0)
df_steam_requirements = pd.read_csv('./clean_datasets/steam_requirements_data.csv', index_col=0)
df_steam_descriptions = pd.read_csv('./clean_datasets/steam_description_data.csv', index_col=0)
df_steam_support = pd.read_csv('./clean_datasets/steam_support_info.csv', index_col=0)
df_steam_achievements = pd.read_csv('./clean_datasets/steam_achievements.csv')
df_steam_categories = pd.read_csv('./clean_datasets/steam_categories.csv')
df_steam_developers = pd.read_csv('./clean_datasets/steam_developers.csv')

In [None]:
df_steam.head()

In [None]:
plt.figure(figsize=(8, 8))

df_plats = df_steam.drop(columns=['name', 'release_date', 'english', 'publisher', 'required_age', 'positive_ratings', 'negative_ratings', 'average_playtime', 'median_playtime', 'owners_range', 'price'])

df2 = df_plats.melt(value_vars=df_plats.columns)
df2 = df2[df2["value"] != False]
plats_plot = sb.countplot(data=df2, x="variable")
plats_plot.set(xlabel='Platform supported', ylabel='App count')

total = len(df_plats['windows'])

patchWindows = plats_plot.patches[0]
percentage = '{:.1f}%'.format(100 * patchWindows.get_height() / total)
x = patchWindows.get_x() + patchWindows.get_width() + 0.02
y = patchWindows.get_y() + patchWindows.get_height() / 2
plats_plot.annotate(percentage, (x, y))

patchLinux = plats_plot.patches[1]
percentage = '{:.1f}%'.format(100 * patchLinux.get_height() / total)
x = patchLinux.get_x() + patchLinux.get_width() / 2 - 0.08
y = patchLinux.get_y() + patchLinux.get_height() * 1.05
plats_plot.annotate(percentage, (x, y))

patchMac = plats_plot.patches[2]
percentage = '{:.1f}%'.format(100 * patchMac.get_height() / total)
x = patchMac.get_x() + patchMac.get_width() / 2 - 0.08
y = patchMac.get_y() + patchMac.get_height() * 1.05
plats_plot.annotate(percentage, (x, y))

plt.show()

In [None]:
plt.figure(figsize=(50, 8))

total = len(df_steam_categories['appid'])
df_categories = df_steam_categories.drop(columns=['appid'])

df2 = df_categories.melt(value_vars=df_categories.columns)
df2 = df2[df2["value"] != False]
plats_plot = sb.countplot(data=df2, x="variable")
plats_plot.set(xlabel='Categories', ylabel='App count')
    
plt.show()

In [None]:
prices1 = sb.displot(df_steam, x="price", multiple="stack", aspect=2)
plt.xlim(0,30)
prices1.set(xlabel='App Price', ylabel='App Count')

In [None]:
prices2 = sb.displot(df_steam, x="price", multiple="stack", aspect=2)
plt.xlim(30,80)
plt.ylim(0,100)
prices2.set(xlabel='App Price', ylabel='App Count')

In [None]:
achiev_percent = sb.displot(df_steam_achievements, x="percent", multiple="stack", aspect=2)
achiev_percent.set(xlabel='Percentage of Users', ylabel='Achievement count')

In [None]:
number_achievements = pd.DataFrame({'n_achiev': []})
number_achievements['n_achiev'] = df_steam_categories['appid'].apply(lambda x: (df_steam_achievements['appid'] == x).sum())

In [None]:
achiev_percent = sb.displot(number_achievements, x="n_achiev", multiple="stack", aspect=3)
achiev_percent.set(xlabel='Number of Achievements', ylabel='App count')
plt.xlim(1,100)
plt.ylim(0,3000)
plt.show()

In [None]:
df_steam['release_date'] = pd.to_datetime(df_steam['release_date']).dt.strftime('%d/%m/%Y')

In [None]:
df_steam.head()

In [None]:
unique_years = np.sort(df_steam['release_date'].dt.year.unique())
averages = []

for year in unique_years:
    average = 0
    rows = df_steam[(df_steam['release_date'].dt.year == year) & (df_steam['price'] != 0)]['price']
    average += rows.sum()
    average /= len(rows)
    averages.append(average)

plt.figure(figsize=(10, 5))
plt.plot(unique_years, averages)
plt.title('Average App Price per Year')
plt.xlabel('Year')
plt.ylabel('Average price')
plt.show()

In [None]:
review_diffs = df_steam['positive_ratings'] - df_steam['negative_ratings']

plt.figure(figsize=(10, 7))
plt.scatter(df_steam['average_playtime'], review_diffs)
plt.xlim(0, 20000)
plt.ylim(-15000,100000)
plt.title('Positive-Negative Ratings vs Average Playtime')
plt.xlabel('Average Playtime')
plt.ylabel('Positive-Negative Ratings')
plt.show()