In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore') 

# Check Data

In [None]:
Data=pd.read_csv('../input/games-of-all-time-from-metacritic/games_of_all_time.csv')


In [None]:
Data.head()

In [None]:
Data.columns

In [None]:
Data.describe()

In [None]:
Data.info()

In [None]:
Data.rating.unique().tolist()

# Clean Data

In [None]:
Data.isnull().sum()

In [None]:
Data.fillna('null',inplace=True)

In [None]:
Data.isnull().sum()

# Basic Data Analysis

In [None]:
Data[['rating','user_score']].groupby(["rating"],as_index=False).mean().sort_values(by='user_score',ascending=True)

In [None]:
Data[['rating','meta_score']].groupby(["rating"],as_index=False).mean().sort_values(by='meta_score',ascending=True)

In [None]:
Data[['type','user_score']].groupby(["type"],as_index=False).mean().sort_values(by='user_score',ascending=True)

In [None]:
Data[['type','meta_score']].groupby(["type"],as_index=False).mean().sort_values(by='meta_score',ascending=True)

## Numerical Description  

In [None]:
def hist_plot(a,b):
    plt.figure(figsize=(9,5))
    sns.histplot(Data,x=a,hue=b)
    plt.xlabel(a)
    plt.ylabel("Frequency")
    plt.show()    


In [None]:
a=["meta_score","user_score"]
b=["type","rating"]
for i in a:
    for j in b:
        hist_plot(i,j)

## Categorical Description

In [None]:
def bar_plot(b):
    var=Data[b]
    varValue=var.value_counts()
    
    plt.figure(figsize=(9,5))
    plt.bar(varValue.index,varValue)
    plt.xticks(varValue.index,varValue.index.values)
    plt.ylabel("Frequency")
    plt.title(b)
    plt.show()
    print("{}:\n {}".format(b,varValue))
    
    
 

In [None]:
b=["type","rating"]

for i in b:
    bar_plot(i)

# Visualization

In [None]:
f, ax = plt.subplots(figsize=(7, 7))
sns.heatmap(Data.corr(),annot=True,linewidths=0.5,linecolor="red", fmt= '.1f',ax=ax)

In [None]:
sns.violinplot(data=Data)
plt.show()

In [None]:
plt.figure(figsize=(12,12))
ax=sns.swarmplot(y="rating",x="user_score",hue="type",data=Data)
plt.show()

In [None]:
plt.figure(figsize=(12,12))
ax=sns.swarmplot(y="rating",x="meta_score",hue="type",data=Data)
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.boxplot(x="rating",y="user_score",hue="type",data=Data)
plt.show()

In [None]:
plt.figure(figsize=(12,10))
sns.boxplot(x="rating",y="meta_score",hue="type",data=Data)
plt.show()

In [None]:
#type & user score ratio & meta score ratio
typelist=Data.type.unique()
userscore_ratio=[]
metascore_ratio=[]

#type & user score ratio
for i in typelist:
    x=Data[Data.type==i]
    userscore_rate=sum(x.user_score)/len(x)
    userscore_ratio.append(userscore_rate)
d_type=pd.DataFrame({"type":typelist,"userscore_ratio":userscore_ratio})
#Visualization
sns.barplot(x=d_type["type"],y=d_type["userscore_ratio"])
plt.xlabel('type')
plt.ylabel('user score rate')
plt.show()



#type & meta score ratio
for i in typelist:
    x=Data[Data.type==i]
    metascore_rate=sum(x.meta_score)/len(x)
    metascore_ratio.append(metascore_rate)
d2_type=pd.DataFrame({"type":typelist,"metascore_ratio":metascore_ratio})

#Visualization
sns.barplot(x=d2_type["type"],y=d2_type["metascore_ratio"])
plt.xlabel('type')
plt.ylabel('meta score rate')
plt.show()




In [None]:
# user score vs mata score
d_type["userscore_ratio"]=d_type["userscore_ratio"]/max(d_type["userscore_ratio"])
d2_type["metascore_ratio"]=d2_type["metascore_ratio"]/max(d2_type["metascore_ratio"])

d3_type=pd.concat([d_type,d2_type["metascore_ratio"]],axis=1)

#Visualization
plt.figure(figsize=(10,6))
sns.pointplot(x='type',y='userscore_ratio',data=d3_type,color='lime',alpha=0.8)
sns.pointplot(x='type',y='metascore_ratio',data=d3_type,color='red',alpha=0.8)
plt.text(2.59,0.994,'user score ratio',color='lime',fontsize = 18,style = 'italic')
plt.text(2.59,1,'meta score ratio',color='red',fontsize = 17,style = 'italic')
plt.xlabel('type',fontsize = 15,color='blue')
plt.ylabel('Values',fontsize = 15,color='blue')
plt.grid()
plt.show()

In [None]:
#violin plot shows the distribution of numbers
sns.violinplot(data=d3_type)
plt.show()

In [None]:
#rating & user score ratio & meta score ratio
ratinglist=Data.rating.unique()
userscore_ratio=[]
metascore_ratio=[]


#rating & user score ratio
for i in ratinglist:
    x=Data[Data.rating==i]
    userscore_rate=sum(x.user_score)/len(x)
    userscore_ratio.append(userscore_rate)
d_rating=pd.DataFrame({"rating":ratinglist,"userscore_ratio":userscore_ratio})

#Visualization
sns.barplot(x=d_rating["rating"],y=d_rating["userscore_ratio"])
plt.xlabel('rating')
plt.ylabel('user score rate')
plt.show()

#rating & meta score ratio
for i in ratinglist:
    x=Data[Data.rating==i]
    metascore_rate=sum(x.meta_score)/len(x)
    metascore_ratio.append(metascore_rate)
d2_rating=pd.DataFrame({"rating":ratinglist,"metascore_ratio":metascore_ratio})

#Visualization
sns.barplot(x=d2_rating["rating"],y=d2_rating["metascore_ratio"])
plt.xlabel('rating')
plt.ylabel('meta score rate')
plt.show()



In [None]:
# user score vs mata score
d_rating["userscore_ratio"]=d_rating["userscore_ratio"]/max(d_rating["userscore_ratio"])
d2_rating["metascore_ratio"]=d2_rating["metascore_ratio"]/max(d2_rating["metascore_ratio"])

d3_rating=pd.concat([d_rating,d2_rating["metascore_ratio"]],axis=1)

#Visualization
plt.figure(figsize=(10,6))
sns.pointplot(x='rating',y='userscore_ratio',data=d3_rating,color='lime',alpha=0.8)
sns.pointplot(x='rating',y='metascore_ratio',data=d3_rating,color='red',alpha=0.8)
plt.text(8,0.994,'user score ratio',color='lime',fontsize = 18,style = 'italic')
plt.text(8,1.04,'meta score ratio',color='red',fontsize = 17,style = 'italic')
plt.xlabel('rating',fontsize = 15,color='blue')
plt.ylabel('Values',fontsize = 15,color='blue')
plt.grid()
plt.show()

In [None]:
sns.violinplot(data=d3_rating)
plt.show()