In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

In [2]:
vg_df = pd.read_csv('Video_Games_Sales_as_at_22_Dec_2016.csv')
vg_cleaned = vg_df.dropna(axis=0, how='any')

In [3]:
vg_cleaned.loc[:,'User_Score'] = vg_cleaned.loc[:,'User_Score'].convert_objects(convert_numeric=True)
vg_cleaned.loc[:,'Year_of_Release'] = vg_cleaned.loc[:,'Year_of_Release'].astype(int)

For all other conversions use the data-type specific converters pd.to_datetime, pd.to_timedelta and pd.to_numeric.
  """Entry point for launching an IPython kernel.
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  self.obj[item] = s


In [4]:
vg_cleaned.head(2)

Unnamed: 0,Name,Platform,Year_of_Release,Genre,Publisher,NA_Sales,EU_Sales,JP_Sales,Other_Sales,Global_Sales,Critic_Score,Critic_Count,User_Score,User_Count,Developer,Rating
0,Wii Sports,Wii,2006,Sports,Nintendo,41.36,28.96,3.77,8.45,82.53,76.0,51.0,8.0,322.0,Nintendo,E
2,Mario Kart Wii,Wii,2008,Racing,Nintendo,15.68,12.76,3.79,3.29,35.52,82.0,73.0,8.3,709.0,Nintendo,E


In [5]:
df = vg_cleaned[['Year_of_Release', 'Platform', 'Global_Sales']].groupby(['Year_of_Release', 'Platform']).sum().reset_index().sort_values('Year_of_Release', ascending=False)

In [6]:
year_platform = vg_cleaned[['Year_of_Release', 'Platform']].groupby(['Year_of_Release']).size().reset_index(name='No. of platforms').sort_values('Year_of_Release', ascending=False)



m_w_u = vg_cleaned[['Platform', 'Year_of_Release']].groupby(['Year_of_Release']).size().reset_index(name='No. of platforms per year').sort_values('No. of platforms per year', ascending=False)
u_w_m = vg_cleaned[['Platform', 'Year_of_Release']].groupby(['Platform']).size().reset_index(name='No. of games on platform').sort_values('No. of games on platform', ascending=False)



In [7]:
m_w_u

Unnamed: 0,Year_of_Release,No. of platforms per year
16,2008,592
15,2007,590
13,2005,562
17,2009,550
14,2006,528
11,2003,498
12,2004,476
10,2002,455
19,2011,453
18,2010,429


In [8]:
u_w_m

Unnamed: 0,Platform,No. of games on platform
7,PS2,1140
14,X360,858
8,PS3,769
5,PC,651
15,XB,565
12,Wii,479
2,DS,464
10,PSP,390
4,GC,348
9,PS4,239


In [9]:
df

Unnamed: 0,Year_of_Release,Platform,Global_Sales
136,2016,XOne,23.36
135,2016,WiiU,3.25
134,2016,PSV,0.95
133,2016,PS4,57.13
132,2016,PC,4.69
131,2016,3DS,2.18
127,2015,PS4,80.11
124,2015,3DS,10.58
125,2015,PC,7.16
126,2015,PS3,0.02


In [10]:
years = m_w_u['Year_of_Release'].tolist()
platforms = u_w_m['Platform'].tolist()

In [11]:
x = []

for i in years:
    row = []
    for j in platforms:
        result = df[(df['Platform']==j) & (df['Year_of_Release']==i)]['Global_Sales']
        if result.any():
            row.append(result.tolist()[0])
        else:
            row.append(0)
    x.append(row)
x = np.array(x)


In [12]:
platforms = np.array(platforms)
num_platforms = platforms.size

In [13]:
evaluation_per_user = []

for num_platform in range(num_platforms):
    my_filter = [True] * num_platforms 
    my_filter[num_platform] = False
    y = x[:, num_platform]
    X = x[:, my_filter]
    evaluation_sum = 0
    for i in range(3):
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25)
        model = LinearRegression()
        model.fit(X_train, y_train)
        evaluation_sum += model.score(X_test, y_test)
    evaluation_per_user.append(evaluation_sum/3)
print(evaluation_per_user)

[-57751.133625198156, -361.63391319103835, -18.7345803584645, -0.36208173556835127, -34.18553821319973, -2.409288698769785, -2105.5378543050797, 0.2341809013033795, -17.24241965323639, -0.5331726460630136, -3538.772207339142, 0.4491671050648341, -17.990453232386578, -7.701563256752746, -6.941439486256261, -26.572731046313237, -57.23247413646348]


In [14]:
pd.DataFrame({'Platfroms': platforms, 'score': evaluation_per_user}).sort_values('score', ascending=False)

Unnamed: 0,Platfroms,score
11,XOne,0.449167
7,PSP,0.234181
3,PC,-0.362082
9,PS4,-0.533173
5,Wii,-2.409289
14,PSV,-6.941439
13,PS,-7.701563
8,GC,-17.24242
12,3DS,-17.990453
2,PS3,-18.73458
