# Python Project

##### This is my project. I obtain the dataset from online website called Kaggle. Basically the dataset contain sales of video games from 1980 until 2020, which is some of the data is not yet updated. The source of dataset is from [VGChartz.com](https://www.vgchartz.com).



### Importing all the libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# set the plots to display in the Jupyter notebook
%matplotlib inline


### Read the Dataset

In [None]:
#This dataset contains data about the Video Game Sales from 1980 to 2020. All columns values is in million
df_sales = pd.read_csv("vgsales.csv")
df_sales.head()

In [None]:
#checking the starting of year
df_sales['Year'].sort_values()

### Change the index with Rank Column

In [None]:
df_sales.set_index('Rank', inplace=True)

In [None]:
df_sales.head()

### Checking the number of row and column

In [None]:
#checking number row and column
df_sales.shape

In [None]:
#show the title of column in dataset
df_sales.columns

### Getting Null Value inside Data

In [None]:
#calculate how many null values is exist inside the datasets
df_sales.isna().sum()

### Dropping the null value

In [None]:
#dropping the null values
df_sales = df_sales.dropna(subset=['Year', 'Publisher'], axis=0) #axis=0 is row, while 1 is column
df_sales = df_sales.reset_index(drop=True)
df_sales.isna().sum()

### Checking for any duplicate 

In [None]:
df_sales['Name'].duplicated().sum()

### Changing the type of data

###### Since the Year is show in float, need to change it to integer.

In [None]:
#checking the type of value
df_sales['Year'] = df_sales['Year'].astype(int)
df_sales['Year'].dtype

In [None]:
#checking the data if the type has been changed
df_sales.head()

### Sort the value by sales

In [None]:
#sort highest value of sales
df_sales.sort_values(by=['NA_Sales','EU_Sales', 'JP_Sales', 'Other_Sales', 'Global_Sales'], ascending = False).head(20)

### Total Sale by Region

In [None]:
#total sale from 1980 until 2020
list_sales = ['NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']
label = list_sales
plt.figure(figsize=(10,10))
value = df_sales[list_sales].sum()
plt.pie(value, labels=label, autopct='%1.1f%%',shadow=False, startangle=90)
plt.title("Total Sale by Region", fontsize=20)
plt.show()

In [None]:
df_sales.groupby(['Year'])['Global_Sales'].sum()

### Total Sale by Year

In [None]:
#total sale by year

sales_per_year = df_sales.groupby(['Year'])[list_sales].sum()
sales_per_year.plot(figsize=(10,10));
plt.title('Total Sale by Year', fontsize=22)
plt.ylabel('Value (million)', fontsize=20)
plt.xlabel('Year', fontsize=20)

plt.show()

### Average Sale Per Year Globally

In [None]:
avg_sales_per_year = df_sales.groupby(['Year'])['Global_Sales'].sum() / 12 #divide by 12 because 1 year = 12months
avg_sales_per_year.plot(figsize=(10,10))
plt.title('Average Sale per Year Globally')
plt.ylabel('Value (millions)')
plt.show()

### Most Game Sale Globally

In [None]:
#top game global
top_game_global = df_sales.groupby(['Name'])['Global_Sales'].sum()
top_game_global = top_game_global.sort_values(ascending = False).nlargest() #show the largest inside the data
top_game_global.plot(kind='bar', color=['mediumpurple','rebeccapurple', 'blueviolet', 'indigo', 'violet'], figsize=(10,10))
plt.title('Most Game Sale in Global')
plt.ylabel('Number of sale (millions)')
plt.xlabel('Name of the Game')
plt.xticks(fontsize=10, rotation=45)
plt.show()

### Top Publisher in North America

In [None]:
#top publisher in north america
top_publisher_na = df_sales.groupby(['Publisher'])['NA_Sales'].sum()
top_publisher_na = top_publisher_na.sort_values(ascending=False).nlargest(10)
explode_na = (0,0.1,0,0,0,0,0,0,0,0)
top_publisher_na.plot(kind='pie', explode = explode_na, shadow=True, figsize=(10,10), autopct='%1.1f%%', startangle=90)
plt.title('Most Famous Publisher in North America')
plt.ylabel('')
plt.show()

### Top Publisher in Europe

In [None]:
top_publisher_eu = df_sales.groupby(['Publisher'])['EU_Sales'].sum()
top_publisher_eu = top_publisher_eu.sort_values(ascending=False).nlargest(10)
explode_eu = (0,0.2,0,0,0,0,0,0,0,0)
top_publisher_eu.plot(kind='pie', explode = explode_eu, shadow=True, figsize=(10,10), startangle=90, autopct='%1.1f%%')
plt.title('Top Publisher in Europe')
plt.ylabel('')
plt.show()

### Famous Platform Globally

In [None]:
top_platform_global = df_sales.groupby(['Platform'])['Global_Sales'].nunique()
top_platform_global = top_platform_global.sort_values(ascending=False).nlargest()
color=['red', 'darkred', 'maroon', 'firebrick', 'brown']
top_platform_global.plot(kind='bar', figsize=(10,10), color=color)
plt.title('Famous Platform Globally', fontsize=20)
plt.ylabel('Number of Platform Sold')
plt.show()

### Create a dictionary from Dataset

In [None]:
#take data from column and make it into a list
publisher_list = df_sales['Publisher'].tolist()
sales_global = df_sales['Global_Sales'].tolist()
game_year_test = df_sales['Year'].tolist()
game_platform = df_sales['Platform'].tolist()
name_list = df_sales['Name'].tolist()

In [None]:
#change dataframe into a dictionary
publisher_dict = dict(zip(df_sales['Name'], df_sales['Publisher']))
game_year = dict(zip(df_sales['Name'], df_sales['Year']))
game_genre = dict(zip(df_sales['Name'], df_sales['Genre']))


In [None]:
#from the list, make it into dictionary
from collections import Counter #to calculate total global sales 
sales_dict = Counter()
for key, value in zip(name_list, sales_global):
    sales_dict[key] += value

sales_dict

In [None]:
#from the list, make it into dictionary
from collections import defaultdict 
platform_dict = defaultdict(list)
for k, v in zip(name_list, game_platform):
    platform_dict[k].append(v)
    
platform_dict

### User Input by Name of Game

In [None]:
class Video_Games():
    
    def __init__(self):
        self.user_input()
        
    def publisher(self):
        for name, publisher_name in publisher_dict.items():
            if name == self.name:
                return publisher_name
    
    def sales_global(self):
        for name, values in sales_dict.items():
            if name == self.name:
                return values
            
    def year(self):
        for name, year in game_year.items():
            if name == self.name:
                return year
    
    def platform(self):
        for name, platform in platform_dict.items():
            if name == self.name:
                return platform
            
    def genre(self):
        for name, genre in game_genre.items():
            if name == self.name:
                return genre
            
    def user_input(self):
        self.name = input('Please enter your game name: ')
        while True:
            if self.name not in name_list:
                print(f'{self.name} is not in game list. Please try again.')
                print('Enter "q" to quit the program.')
                self.name = input('Please enter your game name: ')
                if self.name == 'q':
                    print('Thank you!')
                    break
            else:
                return print(f"\nThe publisher of {self.name} is {self.publisher()}.\n"
                             f"It has sold for {self.sales_global()} millions of copies worldwide!\n"
                             f"It was produced in {self.year()}.\n"
                             f"It is available on {self.platform()} and has {self.genre()} genre.")
            
vg = Video_Games()

In [None]:
print(*helpDictionary.keys(), sep=', ')