#### Importownie bibliotek

In [1]:
import pandas as pd
import numpy as np
import pickle

import xgboost as xgb

from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
from sklearn.model_selection import GridSearchCV, train_test_split, cross_val_score

#### Wczytanie dev_df z pliku

In [103]:
dev_df=pd.read_csv("../dfs/full_df.csv")
dev_df_ok = dev_df[dev_df['state'] == 0]

x_dev=dev_df.drop(['Unnamed: 0','pledged_in_usd','percentage_of_money_collected','backers', 'duration', 'state'], axis=1)
y_dev=dev_df['pledged_in_usd']

#### Reprezentacja x i y 

In [104]:
x_dev.head()

Unnamed: 0,main_cat_cat,country,currency,goal_in_usd
0,80,21,13,500.0
1,54,7,5,3315.48
2,0,21,13,18000.0
3,154,21,13,60000.0
4,159,21,13,75000.0


In [105]:
y_dev.head()

0      575.00
1       33.15
2    20891.00
3        0.00
4    11734.01
Name: pledged_in_usd, dtype: float64

In [106]:
print(x_dev.shape)
print(y_dev.shape)

(368238, 4)
(368238,)


#### Podział próbek na treningowe i testowe

In [107]:
x_train, x_test, y_train, y_test = train_test_split(x_dev, y_dev, test_size=0.2)

In [108]:
x_train['goal_in_usd'].mean()

45759.76972079839

In [109]:
y_train.describe()

count    2.945900e+05
mean     9.161739e+03
std      9.278492e+04
min      0.000000e+00
25%      3.216250e+01
50%      6.320550e+02
75%      4.078883e+03
max      2.033899e+07
Name: pledged_in_usd, dtype: float64

#### Podanie parametrów do xgb.XGBRegressor

In [110]:
params= {'objective': 'reg:squarederror'}

#### Wyliczenie modelu xgb.XGBRegressor

In [111]:
%%time
def test(params):
    model = xgb.XGBRegressor(**params)
    model.fit(x_train,y_train)
    y_pred = model.predict(x_test)
    
    mse = mean_squared_error(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    r2score = r2_score(y_test, y_pred)
    score = model.score(x_test,y_test)
    
    return mse, mae, r2score, score, model

CPU times: user 4 µs, sys: 1e+03 ns, total: 5 µs
Wall time: 6.2 µs


#### Wyniki

In [112]:
mse, mae, r2, score, model = test(params)
print('\nType: XGBRegressor', '\nParams: ', params, '\nMse: ',mse,'\nMae: ', mae, '\nR2: ',r2,'\nScore:',score)

  if getattr(data, 'base', None) is not None and \



Type: XGBRegressor 
Params:  {'objective': 'reg:squarederror'} 
Mse:  7330900182.568497 
Mae:  12367.076864053213 
R2:  0.04817157671658545 
Score: 0.04817157671658545


In [None]:
#{'main_cat_cat': LabelEncoder(), 'country': LabelEncoder(), 'currency': LabelEncoder(), 'state': LabelEncoder()}

#### Zapisanie modelu wyliczonego z dev_df

In [113]:
pkl_filename = "model.pickle"
with open(pkl_filename, 'wb') as file:
    pickle.dump(model, file)

#### W celu pobrania modelu z pliku wystarczy tyle:

In [114]:
with open(pkl_filename, 'rb') as file:
    pickle_model = pickle.load(file)

#### Sprawdzenie czy się poprawnie otworzył

In [115]:
pickle_model
y_pred = pickle_model.predict(x_test)
    
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2score = r2_score(y_test, y_pred)
score = pickle_model.score(x_train,y_train) 
mse, mae, r2score, score

(7330900182.568497,
 12367.076864053213,
 0.04817157671658545,
 0.05785454497013487)