**Data Preparation and Export**

**1. Import Pandas**

In [1]:
import numpy as np
import pandas as pd

**2. Read the CSV File & Save as a Data Frame**

In [2]:
df = pd.read_csv('vgsales.csv')

df.head()

Unnamed: 0,Rank,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,1,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,2,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,3,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,4,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,5,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


**3. Data Cleaning**

**I. Drop unnecessary columns**

In [3]:
cols = ['Rank']

df.drop(cols, inplace=True, axis=1)

df.head()

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Wii Sports,Wii,2006.0,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,Super Mario Bros.,NES,1985.0,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,Mario Kart Wii,Wii,2008.0,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,Wii Sports Resort,Wii,2009.0,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,Pokemon Red/Pokemon Blue,GB,1996.0,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


**II. Check for missing values**

In [4]:
df.isnull().sum()

Name              0
Platform          0
Year            271
Genre             0
Publisher        58
NA_Sales          0
EU_Sales          0
JP_Sales          0
Other_Sales       0
Global_Sales      0
dtype: int64

In [5]:
df.dropna(inplace=True)

df.isnull().sum()

Name            0
Platform        0
Year            0
Genre           0
Publisher       0
NA_Sales        0
EU_Sales        0
JP_Sales        0
Other_Sales     0
Global_Sales    0
dtype: int64

**III. Handle data types**

In [6]:
df.dtypes

Name             object
Platform         object
Year            float64
Genre            object
Publisher        object
NA_Sales        float64
EU_Sales        float64
JP_Sales        float64
Other_Sales     float64
Global_Sales    float64
dtype: object

In [7]:
df = df.astype({'Year':'int'})

df.dtypes

Name             object
Platform         object
Year              int32
Genre            object
Publisher        object
NA_Sales        float64
EU_Sales        float64
JP_Sales        float64
Other_Sales     float64
Global_Sales    float64
dtype: object

In [8]:
df.head()

Unnamed: 0,Name,Platform,Year,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales
0,Wii Sports,Wii,2006,Sports,Nintendo,41.49,29.02,3.77,8.46,82.74
1,Super Mario Bros.,NES,1985,Platform,Nintendo,29.08,3.58,6.81,0.77,40.24
2,Mario Kart Wii,Wii,2008,Racing,Nintendo,15.85,12.88,3.79,3.31,35.82
3,Wii Sports Resort,Wii,2009,Sports,Nintendo,15.75,11.01,3.28,2.96,33.0
4,Pokemon Red/Pokemon Blue,GB,1996,Role-Playing,Nintendo,11.27,8.89,10.22,1.0,31.37


**4. DataFrame Export**

**I. Convert all rows into a list of tuples**

In [9]:
y = []

for i in range(len(df)):
    x = tuple(df.iloc[i])
    y.append(x)
    
y

[('Wii Sports',
  'Wii',
  2006,
  'Sports',
  'Nintendo',
  41.49,
  29.02,
  3.77,
  8.46,
  82.74),
 ('Super Mario Bros.',
  'NES',
  1985,
  'Platform',
  'Nintendo',
  29.08,
  3.58,
  6.81,
  0.77,
  40.24),
 ('Mario Kart Wii',
  'Wii',
  2008,
  'Racing',
  'Nintendo',
  15.85,
  12.88,
  3.79,
  3.31,
  35.82),
 ('Wii Sports Resort',
  'Wii',
  2009,
  'Sports',
  'Nintendo',
  15.75,
  11.01,
  3.28,
  2.96,
  33.0),
 ('Pokemon Red/Pokemon Blue',
  'GB',
  1996,
  'Role-Playing',
  'Nintendo',
  11.27,
  8.89,
  10.22,
  1.0,
  31.37),
 ('Tetris', 'GB', 1989, 'Puzzle', 'Nintendo', 23.2, 2.26, 4.22, 0.58, 30.26),
 ('New Super Mario Bros.',
  'DS',
  2006,
  'Platform',
  'Nintendo',
  11.38,
  9.23,
  6.5,
  2.9,
  30.01),
 ('Wii Play', 'Wii', 2006, 'Misc', 'Nintendo', 14.03, 9.2, 2.93, 2.85, 29.02),
 ('New Super Mario Bros. Wii',
  'Wii',
  2009,
  'Platform',
  'Nintendo',
  14.59,
  7.06,
  4.7,
  2.26,
  28.62),
 ('Duck Hunt',
  'NES',
  1984,
  'Shooter',
  'Nintendo',
  2

**II. Save the list as a txt.file**

In [10]:
file = open('vgsales.txt', 'w', encoding='utf-8')

for tuple in y:
    file.write(str(tuple) + ',' + '\n')
    
file.close()