In [264]:
# import modules
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
from datetime import datetime, timedelta
from sklearn import metrics
import pandas as pd
import seaborn as sns
import math
%matplotlib inline

In [265]:
trainingSet = pd.read_csv("train.csv", parse_dates=['purchase_date', 'release_date'])
testingSet = pd.read_csv("test.csv", parse_dates=['purchase_date', 'release_date'])

In [266]:
x_train = trainingSet.drop(['playtime_forever'], axis=1)
y_train = trainingSet['playtime_forever']
x_test = testingSet

In [267]:
x_train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 357 entries, 0 to 356
Data columns (total 10 columns):
id                        357 non-null int64
is_free                   357 non-null bool
price                     357 non-null float64
genres                    357 non-null object
categories                357 non-null object
tags                      357 non-null object
purchase_date             355 non-null datetime64[ns]
release_date              357 non-null datetime64[ns]
total_positive_reviews    355 non-null float64
total_negative_reviews    355 non-null float64
dtypes: bool(1), datetime64[ns](2), float64(3), int64(1), object(3)
memory usage: 25.6+ KB


In [268]:
y_train.describe()

count    357.000000
mean       3.119234
std       11.213114
min        0.000000
25%        0.000000
50%        0.083333
75%        1.616667
max      113.800000
Name: playtime_forever, dtype: float64

In [269]:
# y_train = y_train.map(lambda y: math.log(y + 1))
# sns.kdeplot(y_train, shade=True)

#### 1. Drop the duplicates.
- drop the games which have not played,
- drop row with null values,
- drop column 'id',
- drop column 'tags', because it is random and subjective.


In [270]:
x_train.drop_duplicates(inplace=True)
x_test.drop_duplicates(inplace=True)
x_train.drop(['id'], axis=1, inplace=True)
x_test.drop(['id'], axis=1, inplace=True)

In [271]:
x_train

Unnamed: 0,is_free,price,genres,categories,tags,purchase_date,release_date,total_positive_reviews,total_negative_reviews
0,False,3700.0,"Adventure,Casual,Indie","Single-player,Steam Trading Cards,Steam Cloud","Indie,Adventure,Story Rich,Casual,Atmospheric,...",2018-07-02,2013-12-10,372.0,96.0
1,True,0.0,RPG,"Single-player,Partial Controller Support","Mod,Utilities,RPG,Game Development,Singleplaye...",2016-11-26,2015-08-12,23.0,0.0
2,False,5000.0,"Adventure,Casual,Indie","Single-player,Full controller support,Steam Tr...","Point & Click,Adventure,Story Rich,Comedy,Indi...",2018-07-02,2014-01-28,3018.0,663.0
3,False,9900.0,"Action,RPG","Single-player,Multi-player,Steam Achievements,...","Medieval,RPG,Open World,Strategy,Sandbox,Actio...",2016-11-28,2010-03-31,63078.0,1746.0
4,False,4800.0,"Action,Indie,Strategy","Single-player,Co-op,Steam Achievements,Full co...","Tower Defense,Co-op,Action,Strategy,Online Co-...",2018-03-04,2012-07-30,8841.0,523.0
...,...,...,...,...,...,...,...,...,...
352,False,8800.0,"Action,Simulation","Single-player,Multi-player,Online Multi-Player...","Simulation,Action,VR,Flight",2017-11-24,2016-12-20,150.0,91.0
353,False,6800.0,"Action,Adventure","Single-player,Steam Achievements,Full controll...","Batman,Action,Open World,Superhero,Stealth,Adv...",2018-08-15,2015-06-23,19008.0,4849.0
354,False,8300.0,"Action,Adventure,RPG","Single-player,Steam Achievements,Full controll...","Action,Hack and Slash,Adventure,RPG,Open World...",2018-01-30,2015-11-05,5099.0,1719.0
355,False,6800.0,"Action,Adventure","Single-player,Steam Achievements,Full controll...","Action,FPS,Adventure,Shooter,Cyberpunk,Robots,...",2017-09-23,2016-06-03,718.0,159.0


#### 2. Handle categorial variables categories and genres. 
- Do one-hot encoding;
- Make the testset and trainset have same features.

In [272]:
# x_train["categories"]

In [273]:
# categories_list = []
# for cates in x_train["categories"].tolist():
#     for cate in cates.split(','):
#             categories_list.append(cate)
# Counter([j for j in categories_list]).most_common(15)

In [274]:
# categories_list = []
# for cates in x_test["categories"].tolist():
#     for cate in cates.split(','):
#             categories_list.append(cate)
# Counter([j for j in categories_list]).most_common(15)

In [275]:
# # create separate columns for 10 genres
# top_cates = [m[0] for m in Counter([j for j in categories_list]).most_common(10)]
        
# for g in top_cates:
#     x_train['cate_' + g] = x_train['categories'].apply(lambda cates: 1 if g in cates else 0)
# for g in top_cates:
#     x_test['cate_' + g] = x_test['categories'].apply(lambda cates: 1 if g in cates else 0)

In [276]:
x_test.head()

Unnamed: 0,is_free,price,genres,categories,tags,purchase_date,release_date,total_positive_reviews,total_negative_reviews
0,False,3500,"Action,Adventure","Single-player,Full controller support","Action,Adventure,Horror,Third Person,Singlepla...",2018-10-28,2012-05-22,2607.0,1122.0
1,False,11600,"Action,Adventure,Strategy","Single-player,Multi-player,Online Multi-Player...","Mechs,Strategy,Turn-Based,Turn-Based Tactics,S...",2019-07-20,2018-04-24,5762.0,2235.0
2,False,2100,"Indie,Simulation,Strategy","Single-player,Steam Achievements,Steam Trading...","Strategy,Simulation,Indie,Political,Cold War,P...",2019-07-16,2017-03-20,687.0,133.0
3,False,3600,"Action,Strategy","Single-player,Multi-player,Co-op","Strategy,Action,Military,Tactical",2018-01-30,2007-07-17,67.0,39.0
4,False,3400,"Action,Adventure","Single-player,Co-op,Steam Achievements,Full co...","Open World,Action,Comedy,Co-op,Third-Person Sh...",2017-02-24,2013-08-22,40344.0,3708.0


In [277]:
# genres_list = []
# for genres in x_train["genres"].tolist():
#     for genere in genres.split(','):
#             genres_list.append(genere)
# Counter([j for j in genres_list]).most_common()[:10]

In [278]:
# genres_list = []
# for genres in x_test["genres"].tolist():
#     for genere in genres.split(','):
#             genres_list.append(genere)
# Counter([j for j in genres_list]).most_common()[:10]

In [279]:
# create separate columns for top20% genres
# top_genres = [m[0] for m in Counter([j for j in genres_list]).most_common(3)]
        
# for g in top_genres:
#     x_train['gene_' + g] = x_train['genres'].apply(lambda genres: 1 if g in genres else 0)
# for g in top_genres:
#     x_test['gene_' + g] = x_test['genres'].apply(lambda genres: 1 if g in genres else 0)

In [280]:
# x_train = x_train.drop(['categories', 'genres'],axis=1)
# x_test = x_test.drop(['categories', 'genres'],axis=1)

In [281]:
# x_train.head(5)

In [282]:
categories = x_train["categories"].str.get_dummies(",")
categories.columns = categories.columns.map(lambda x: "cate_" + x)
genres = x_train["genres"].str.get_dummies(",")
genres.columns = genres.columns.map(lambda x: "genre_" + x)
x_train = x_train.drop(['categories', 'genres'],axis=1)
x_train = pd.concat([x_train,categories,genres], axis = 1)
test_categories = x_test["categories"].str.get_dummies(",")
test_categories.columns = test_categories.columns.map(lambda x: "cate_" + x)
test_genres = x_test["genres"].str.get_dummies(",")
test_genres.columns = test_genres.columns.map(lambda x: "genre_" + x)
x_test = x_test.drop(['categories', 'genres'],axis=1)
x_test = pd.concat([x_test,test_categories,test_genres], axis = 1)

In [283]:
tr_tags = x_train['tags'].str.get_dummies(",").add_prefix("tags_")
for col in tr_tags.columns:
    if tr_tags[col].value_counts()[1]<180:
        tr_tags.drop(col, axis=1, inplace=True)
x_train = x_train.drop(columns='tags')
x_train = pd.concat([x_train, tr_tags], axis=1)

te_tags = x_test['tags'].str.get_dummies(",").add_prefix("tags_")
x_test = x_test.drop(columns='tags')
x_test = pd.concat([x_test, te_tags], axis=1)

In [284]:
train_dummies = [col for col in x_train if "_" in col]
test_dummies = [col for col in x_test if "_" in col]
for col in train_dummies:
    if col not in x_test.columns:
        x_test[col] = 0
for col in x_test.columns:
    if ("_" in col)  and col not in train_dummies:
        x_test.drop(col, axis=1, inplace=True)

In [285]:
print("shape of x_train", x_train.shape)
print("shape of x_test", x_test.shape)
print("shape of y_train", y_train.shape)

shape of x_train (357, 60)
shape of x_test (90, 60)
shape of y_train (357,)


#### 3. Handle the date_time varibles.
- Calculate the gap between p_d and r_d
- Calculate the gap between p_d and Kaggle start date(2019-10-01)
- Visualize these dates with playtime

In [286]:
# 1. exchange the loc of p_d and r_d if they p_d < r_d
# 2. define a new date if p_d is null.

for i, row in x_train.iterrows():
    gap_r_p = row['purchase_date'] - row['release_date']
    if gap_r_p.days < 0:
        temp = row['purchase_date']
        x_train.loc[i,'purchase_date'] = row['release_date']
        x_train.loc[i,'release_date'] = temp
    gap_p_r_mean = timedelta(days=(x_train['purchase_date'] - x_train['release_date']).mean().days)
    if str(row['purchase_date']) == 'NaT':
        self_define_date = row['release_date'] + gap_p_r_mean
        if self_define_date > datetime(2019,10,1):
            self_define_date = datetime(2019,10,1)
        x_train.loc[i,'purchase_date'] = self_define_date
        
for i, row in x_test.iterrows():
    gap_r_p = row['purchase_date'] - row['release_date']
    if gap_r_p.days < 0:
        temp = row['purchase_date']
        x_test.loc[i,'purchase_date'] = row['release_date']
        x_test.loc[i,'release_date'] = temp
    gap_p_r_mean = timedelta(days=(x_test['purchase_date'] - x_test['release_date']).mean().days)
    if str(row['purchase_date']) == 'NaT':
        self_define_date = row['release_date'] + gap_p_r_mean
        if self_define_date > datetime(2019,10,1):
            self_define_date = datetime(2019,10,1)
        x_test.loc[i,'purchase_date'] = self_define_date

In [287]:
# Calculate the gap between p_d and r_d

x_train['gap_r_p'] = x_train['purchase_date'] - x_train['release_date']
x_test['gap_r_p'] = x_test['purchase_date'] - x_test['release_date']
x_train['gap_r_p'] = x_train['gap_r_p'].astype(str).map(lambda x: int(x.split(' ')[0]))
x_test['gap_r_p'] = x_test['gap_r_p'].astype(str).map(lambda x: int(x.split(' ')[0]))

In [288]:
# Calculate the gap between p_d and kaggel start date

kaggle_date = datetime(2019,10,1)
x_train['gap_p_s'] = kaggle_date - x_train['purchase_date']
x_test['gap_p_s'] = kaggle_date - x_test['purchase_date']
x_train['gap_p_s'] = x_train['gap_p_s'].astype(str).map(lambda x: int(x.split(' ')[0]))
x_test['gap_p_s'] = x_test['gap_p_s'].astype(str).map(lambda x: int(x.split(' ')[0]))

In [289]:
x_train['purchase_date']

0     2018-07-02
1     2016-11-26
2     2018-07-02
3     2016-11-28
4     2018-03-04
         ...    
352   2017-11-24
353   2018-08-15
354   2018-01-30
355   2017-09-23
356   2018-03-05
Name: purchase_date, Length: 357, dtype: datetime64[ns]

In [290]:
x_train.drop(['purchase_date', 'release_date'], axis=1, inplace=True)
x_test.drop(['purchase_date', 'release_date'], axis=1, inplace=True)

#### 4. Handel review and is_free
1. For review, fill 0 to null;
2. For is_free, drop the column.

In [291]:
x_train['total_positive_reviews'].describe()

count       355.000000
mean      15356.115493
std       44032.358308
min           0.000000
25%         799.000000
50%        2809.000000
75%        9666.500000
max      440902.000000
Name: total_positive_reviews, dtype: float64

In [292]:
x_train['total_positive_reviews'].fillna(value=x_train['total_positive_reviews'].mean(), inplace=True)
x_test['total_positive_reviews'].fillna(value=x_test['total_positive_reviews'].mean(), inplace=True)
x_train['total_negative_reviews'].fillna(value=x_train['total_negative_reviews'].mean(), inplace=True)
x_test['total_negative_reviews'].fillna(value=x_test['total_negative_reviews'].mean(), inplace=True)
x_train.drop(['is_free'], axis=1, inplace=True)
x_test.drop(['is_free'], axis=1, inplace=True)

In [293]:
print("shape of x_train", x_train.shape)
print("shape of x_test", x_test.shape)
print("shape of y_train", y_train.shape)
x_train

shape of x_train (357, 59)
shape of x_test (90, 59)
shape of y_train (357,)


Unnamed: 0,price,total_positive_reviews,total_negative_reviews,cate_Captions available,cate_Co-op,cate_Commentary available,cate_Cross-Platform Multiplayer,cate_Full controller support,cate_In-App Purchases,cate_Includes Source SDK,...,genre_Strategy,genre_Utilities,genre_Violent,tags_Action,tags_Adventure,tags_Atmospheric,tags_Indie,tags_Singleplayer,gap_r_p,gap_p_s
0,3700.0,372.0,96.0,0,0,0,0,0,0,0,...,0,0,0,0,1,1,1,1,1665,456
1,0.0,23.0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,472,1039
2,5000.0,3018.0,663.0,0,0,0,0,1,0,0,...,0,0,0,0,1,0,1,1,1616,456
3,9900.0,63078.0,1746.0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,1,1,2434,1037
4,4800.0,8841.0,523.0,0,1,0,0,1,0,0,...,1,0,0,1,1,0,1,1,2043,576
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,8800.0,150.0,91.0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,339,676
353,6800.0,19008.0,4849.0,0,0,0,0,1,0,0,...,0,0,0,1,1,1,0,1,1149,412
354,8300.0,5099.0,1719.0,0,0,0,0,1,0,0,...,0,0,0,1,1,0,0,1,817,609
355,6800.0,718.0,159.0,0,0,0,0,1,0,0,...,0,0,0,1,1,0,0,1,477,738


### Normalize data

In [294]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
norm_price = scaler.fit(x_train['price'].values.reshape(-1,1))
x_train['price'] = scaler.fit_transform(x_train['price'].values.reshape(-1, 1), norm_price)
norm_pos = scaler.fit(x_train['total_positive_reviews'].values.reshape(-1,1))
x_train['total_positive_reviews'] = scaler.fit_transform(x_train['total_positive_reviews'].values.reshape(-1, 1), norm_pos)
norm_neg = scaler.fit(x_train['total_negative_reviews'].values.reshape(-1,1))
x_train['total_negative_reviews'] = scaler.fit_transform(x_train['total_negative_reviews'].values.reshape(-1, 1), norm_neg)
norm_gap_r_p = scaler.fit(x_train['gap_r_p'].values.reshape(-1,1))
x_train['gap_r_p'] = scaler.fit_transform(x_train['gap_r_p'].values.reshape(-1, 1), norm_gap_r_p)
norm_gap_p_s = scaler.fit(x_train['gap_p_s'].values.reshape(-1,1))
x_train['gap_p_s'] = scaler.fit_transform(x_train['gap_p_s'].values.reshape(-1, 1), norm_gap_p_s)

In [295]:
scaler = StandardScaler()
norm_price = scaler.fit(x_test['price'].values.reshape(-1,1))
x_test['price'] = scaler.fit_transform(x_test['price'].values.reshape(-1, 1), norm_price)
norm_pos = scaler.fit(x_test['total_positive_reviews'].values.reshape(-1,1))
x_test['total_positive_reviews'] = scaler.fit_transform(x_test['total_positive_reviews'].values.reshape(-1, 1), norm_pos)
norm_neg = scaler.fit(x_test['total_negative_reviews'].values.reshape(-1,1))
x_test['total_negative_reviews'] = scaler.fit_transform(x_test['total_negative_reviews'].values.reshape(-1, 1), norm_neg)
norm_gap_r_p = scaler.fit(x_test['gap_r_p'].values.reshape(-1,1))
x_test['gap_r_p'] = scaler.fit_transform(x_test['gap_r_p'].values.reshape(-1, 1), norm_gap_r_p)
norm_gap_p_s = scaler.fit(x_test['gap_p_s'].values.reshape(-1,1))
x_test['gap_p_s'] = scaler.fit_transform(x_test['gap_p_s'].values.reshape(-1, 1), norm_gap_p_s)

In [296]:
x_train

Unnamed: 0,price,total_positive_reviews,total_negative_reviews,cate_Captions available,cate_Co-op,cate_Commentary available,cate_Cross-Platform Multiplayer,cate_Full controller support,cate_In-App Purchases,cate_Includes Source SDK,...,genre_Strategy,genre_Utilities,genre_Violent,tags_Action,tags_Adventure,tags_Atmospheric,tags_Indie,tags_Singleplayer,gap_r_p,gap_p_s
0,-0.073279,-0.341737,-0.132221,0,0,0,0,0,0,0,...,0,0,0,0,1,1,1,1,0.720856,-0.308018
1,-0.077293,-0.349696,-0.136123,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,-0.630649,1.574195
2,-0.071868,-0.281390,-0.109175,0,0,0,0,1,0,0,...,0,0,0,0,1,0,1,1,0.665346,-0.308018
3,-0.066551,1.088374,-0.065155,0,0,0,0,0,0,0,...,0,0,0,1,1,0,1,1,1.592028,1.567738
4,-0.072085,-0.148588,-0.114866,0,1,0,0,1,0,0,...,1,0,0,1,1,0,1,1,1.149079,0.079401
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,-0.067745,-0.346800,-0.132425,0,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,-0.781320,0.402251
353,-0.069915,0.083287,0.060970,0,0,0,0,1,0,0,...,0,0,0,1,1,1,0,1,0.136299,-0.450072
354,-0.068287,-0.233930,-0.066253,0,0,0,0,1,0,0,...,0,0,0,1,1,0,0,1,-0.239812,0.185941
355,-0.069915,-0.333846,-0.129661,0,0,0,0,1,0,0,...,0,0,0,1,1,0,0,1,-0.624985,0.602417


## 5. Build model

In [297]:
import warnings
warnings.filterwarnings('ignore')

In [298]:
from xgboost import XGBRegressor as xgb
from sklearn.model_selection import KFold, cross_val_score as CVS, train_test_split as TTS 
from sklearn.metrics import mean_squared_error as MSE

xTrain, xVal, yTrain, yVal = TTS(x_train, y_train, test_size=0.2, random_state=300)

In [370]:
xgb_model = xgb(max_depth=3, learning_rate=0.01, n_estimators=1000, verbosity=1,gamma=0.1,colsample_bylevel=0.88,
                             tree_method='auto', reg_alpha=0.05, reg_lambda=0.05)
xgb_model.fit(xTrain, yTrain, eval_metric='rmse', verbose=True,
            eval_set=[(xVal, yVal)], early_stopping_rounds=20)

[0]	validation_0-rmse:3.37901
Will train until validation_0-rmse hasn't improved in 20 rounds.
[1]	validation_0-rmse:3.36828
[2]	validation_0-rmse:3.35254
[3]	validation_0-rmse:3.31492
[4]	validation_0-rmse:3.29018
[5]	validation_0-rmse:3.27883
[6]	validation_0-rmse:3.28691
[7]	validation_0-rmse:3.2691
[8]	validation_0-rmse:3.26917
[9]	validation_0-rmse:3.27663
[10]	validation_0-rmse:3.29351
[11]	validation_0-rmse:3.308
[12]	validation_0-rmse:3.33378
[13]	validation_0-rmse:3.34774
[14]	validation_0-rmse:3.37964
[15]	validation_0-rmse:3.41506
[16]	validation_0-rmse:3.45379
[17]	validation_0-rmse:3.49558
[18]	validation_0-rmse:3.54025
[19]	validation_0-rmse:3.58756
[20]	validation_0-rmse:3.63733
[21]	validation_0-rmse:3.69412
[22]	validation_0-rmse:3.74824
[23]	validation_0-rmse:3.80511
[24]	validation_0-rmse:3.86284
[25]	validation_0-rmse:3.94058
[26]	validation_0-rmse:3.97377
[27]	validation_0-rmse:4.04763
Stopping. Best iteration:
[7]	validation_0-rmse:3.2691



XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=0.88,
             colsample_bynode=1, colsample_bytree=1, gamma=0.1,
             importance_type='gain', learning_rate=0.01, max_delta_step=0,
             max_depth=3, min_child_weight=1, missing=None, n_estimators=1000,
             n_jobs=1, nthread=None, objective='reg:linear', random_state=0,
             reg_alpha=0.05, reg_lambda=0.05, scale_pos_weight=1, seed=None,
             silent=None, subsample=1, tree_method='auto', verbosity=1)

In [371]:
order = x_train.columns.values.tolist()
x_test = x_test[order]
y_test_predict = xgb_model.predict(x_test)
y_test_predict = [y if y > 0 else 0  for y in y_test_predict]
submission = pd.read_csv('samplesubmission.csv')
submission["playtime_forever"] = y_test_predict
submission.to_csv("samplesubmission.csv", index=False)

In [372]:
# randomforest
from sklearn.ensemble import RandomForestRegressor as rf
rf_model = rf(n_estimators=1000, criterion='mae', oob_score=True, random_state=1, max_depth = 2).fit(x_train, y_train)

In [373]:
from sklearn.ensemble import GradientBoostingRegressor as gbdt
gbdt_model = gbdt(n_estimators=1000, max_depth=2).fit(x_train, y_train)

In [374]:
from sklearn.linear_model import Lasso
lasso = Lasso(alpha=0.001).fit(x_train, y_train)

In [375]:
from mlxtend.regressor import StackingRegressor
models = [xgb_model, rf_model, gbdt_model]
mix_model = StackingRegressor(regressors=models, meta_regressor=lasso)
mix_model.fit(x_train, y_train)



StackingRegressor(meta_regressor=Lasso(alpha=0.001, copy_X=True,
                                       fit_intercept=True, max_iter=1000,
                                       normalize=False, positive=False,
                                       precompute=False, random_state=None,
                                       selection='cyclic', tol=0.0001,
                                       warm_start=False),
                  refit=True,
                  regressors=[XGBRegressor(base_score=0.5, booster='gbtree',
                                           colsample_bylevel=0.88,
                                           colsample_bynode=1,
                                           colsample_bytree=1, gamma=0.1,
                                           i...
                                                        max_leaf_nodes=None,
                                                        min_impurity_decrease=0.0,
                                                        min_impur

In [376]:
from sklearn.model_selection import cross_val_score
accuracacy = cross_val_score(estimator = sclf, X=x_train, y=y_train, cv=10, scoring='neg_mean_squared_error')
rmse = np.sqrt(-accuracacy).mean()



In [377]:
rmse

9.763699850836321

In [378]:
order = x_train.columns.values.tolist()
x_test = x_test[order]
order

['price',
 'total_positive_reviews',
 'total_negative_reviews',
 'cate_Captions available',
 'cate_Co-op',
 'cate_Commentary available',
 'cate_Cross-Platform Multiplayer',
 'cate_Full controller support',
 'cate_In-App Purchases',
 'cate_Includes Source SDK',
 'cate_Includes level editor',
 'cate_Local Co-op',
 'cate_Local Multi-Player',
 'cate_MMO',
 'cate_Multi-player',
 'cate_Online Co-op',
 'cate_Online Multi-Player',
 'cate_Partial Controller Support',
 'cate_Remote Play on Phone',
 'cate_Remote Play on TV',
 'cate_Remote Play on Tablet',
 'cate_Shared/Split Screen',
 'cate_Single-player',
 'cate_Stats',
 'cate_Steam Achievements',
 'cate_Steam Cloud',
 'cate_Steam Leaderboards',
 'cate_Steam Trading Cards',
 'cate_Steam Workshop',
 'cate_SteamVR Collectibles',
 'cate_VR Support',
 'cate_Valve Anti-Cheat enabled',
 'genre_Action',
 'genre_Adventure',
 'genre_Animation & Modeling',
 'genre_Audio Production',
 'genre_Casual',
 'genre_Design & Illustration',
 'genre_Early Access',
 

In [379]:
y_test_predict = mix_model.predict(x_test)
y_test_predict = [y if y > 0 else 0  for y in y_test_predict]

In [380]:
submission = pd.read_csv('samplesubmission.csv')

In [381]:
submission["playtime_forever"] = y_test_predict

In [382]:
submission.head()

Unnamed: 0,id,playtime_forever
0,0,0.244281
1,1,0.0
2,2,0.0
3,3,0.15381
4,4,17.394113


In [383]:
submission.to_csv("mix_model3.csv", index=False)