# Learn v75 med walkthrough-metoden

In [1]:
import pandas as pd 
import numpy as np 
from catboost import CatBoostClassifier,Pool,cv,utils

import sys
sys.path.append('C:\\Users\peter\\Documents\\MyProjects\\PyProj\\Trav\\spel')
import V75_scraping as vs

In [2]:

def proba_order_score(df_, y, proba):  # df skall innehålla datum,avd,vodds
    kassa=1000
    df = df_.copy()
    df['proba'] = proba[:,1]
    df['f'] = (df.proba*df.vodds - 1) / (df.vodds-1)  # kelly formel
    df['spela'] = df.f >0
    df['insats'] = df.spela * df.f * kassa

    df.sort_values(['datum','avd','proba'],ascending=[True,True,False],inplace=True)
    proba_order=df.groupby(['datum','avd']).proba.cumcount()

    df['prob_order']=proba_order+1
    df['y'] = y

    print('log(proba)',np.log(df.loc[df.y==1].proba).mean())
    return df, df.loc[df.y==1].prob_order.mean()   # mean prob_order för vinnarhäst


In [3]:
def create_ekipage(df_):
    df=df_.copy()
    prefs = ['','h1_','h2_','h3_','h4_','h5_',]
    for pr in prefs:
        df[pr+'ekipage'] = df[pr+'kusk'].str.cat(df['häst'], sep =", ")
        df.drop([pr+'kusk'],axis=1, inplace=True)
        
    return df.drop(['häst'], axis=1)

In [4]:
### returnera en modell med parametrar satta
def get_model(d=6,l2=2,iterations=3000,use_best=True,verbose=False):
    model = CatBoostClassifier(iterations=iterations,use_best_model=use_best, 
        custom_metric=['Logloss', 'AUC','Recall', 'Precision', 'F1', 'Accuracy'],

        eval_metric='Accuracy', 
        depth=d,l2_leaf_reg=l2,
        auto_class_weights='Balanced',verbose=verbose, random_state=2021) 
    return model                

In [5]:
### Features som inte används vid träning
def remove_features(df_,remove_mer=[]):
    # df = df_.copy()
    #remove_mer=['h5_perf','h5_auto','h4_perf','h4_auto', 'h3_perf', 'h2_perf']
    df = df_.drop(['avd','startnr','vodds','podds','bins','h1_dat','h2_dat','h3_dat','h4_dat','h5_dat'],axis=1) #
    if remove_mer:
        df = df.drop(remove_mer,axis=1)
    
    # df=check_unique(df.copy())
    # df=check_corr(df.copy())
    return df

In [6]:
 ## byt ut alla NaN till text för cat_features
def replace_NaN(X_train,X_test=None, cat_features=[]):
    # print('cat_features',cat_features)
    for c in cat_features:
        # print(c)
        X_train.loc[X_train[c].isna(),c] = 'None'       ### byt ut None-värden till texten 'None
        if X_test is not None:  ## om X_test är med
            X_test.loc [X_test[c].isna(),c] = 'None'    ### byt ut None-värden till texten 'None

    return X_train,X_test

In [7]:
def scrape_nya_lopp():
    """scrape nya lopp och lägg in i all_data.csv"""
    nya_lopp,strukna = vs.v75_scraping(resultat=True,history=True)

    df=pd.concat([pd.read_csv('all_data.csv'), nya_lopp])
    print('shape med nya lopp',df.shape)
    #ta bort dubletter
    df.drop_duplicates(['datum','avd','häst'],inplace=True)
    df.sort_values(by=['datum','avd'],inplace=True)
    df.reset_index(drop=True,inplace=True)
    print('shape med dubletter bort',df.shape)

    df.to_csv('all_data.csv', index=False)

    print("första datum i df =",df.datum.head(1).to_list()[0])
    print("sista  datum i df =",df.datum.tail(1).to_list()[0])

    return df,nya_lopp

In [8]:
### beräkna vilka datum att använda ###
def get_alla_datum(test_from_proc=0.75, train_from_proc=0, total_omlärning = False):
    if total_omlärning:
        nya_lopp=None
        df = pd.read_csv('all_data.csv')     
        datum_att_lära = df.datum.unique()
        split_ix = int(len(datum_att_lära)*test_from_proc)
    else:
        # normalt adderar vi bara 1 eller flera veckor från "omg_att_spela_link.csv"
        df, nya_lopp = scrape_nya_lopp()  # scrape från 'omg_att_spela_link.csv' och addera till df
        omg_df = pd.read_csv('omg_att_spela_link.csv')     
        startix=omg_df.Link.str.find('spel')[0]    # index till 'spel' i url
        datum_att_lära = omg_df.Link.str.slice(start=startix+5,stop=startix+15).to_list() # en datum 
        split_ix=0
        print(f'datum att lära: {datum_att_lära}')

    return df,nya_lopp,datum_att_lära,split_ix


## Walkthrough-funktionen  här

In [15]:

### Kör en walkthrough learn här, en datum i taget framåt

# Jag har ändrat till att alla steg kör utan test-datam ed fast iterations=100
def walkthrough(classic_test=False, verbose=False):
    
    df, nya_lopp, alla_datum, split_ix = get_alla_datum(0.8)

    l2_leaf_regs=2
    model=get_model(use_best=False,iterations=100)
    df=remove_features(df.copy())
    cat_features = list(df.loc[:,df.dtypes=='O'].columns)
    df,_ = replace_NaN(df.copy(), cat_features=cat_features)    
    print(f'cat_features {cat_features}\n')

    df['plac']=(df.plac==1)*1
        
    for nr,datum in enumerate(alla_datum[split_ix:]):
        print(f'walk-iter {nr+1} av {len(alla_datum[split_ix:])} ',end=': ')

        X_train = df.loc[df.datum<datum,:].copy()
        y_train = X_train.plac; X_train.drop(['plac'],axis=1,inplace=True)

        if classic_test:    ### klassisk train/test utan walkthrough
            X_test  = df.loc[df.datum>=datum,:].copy()
            y_test  = X_test.plac;  X_test.drop(['plac'],axis=1,inplace=True)
            train_pool = Pool(X_train,y_train,cat_features=cat_features)
            test_pool = Pool(X_test,y_test,cat_features=cat_features)
            model.fit(train_pool,use_best_model=True, verbose=verbose,eval_set=test_pool)
        else:
            X_test  = df.loc[df.datum==datum,:].copy()
            y_test  = X_test.plac;  X_test.drop(['plac'],axis=1,inplace=True)
            train_pool = Pool(X_train,y_train,cat_features=cat_features)
            test_pool = Pool(X_test,y_test,cat_features=cat_features)
            model.fit(train_pool,use_best_model=False, verbose=verbose)

        print('best iteration',model.get_best_iteration(), '\tbest score', round(model.get_best_score()['learn']['Accuracy'],3) )
        ##['validation']['Logloss'],3),'\t', round(model.get_best_score()['validation']['Accuracy:use_weights=true'],3))
        
        if classic_test:    ### klassisk train/test utan walkthrough
            return model,cat_features
    
        model.save_model('modeller/model_'+datum)

    X_train =df.copy().drop('plac',axis=1)
    y_train = df.plac 
    model.fit(X_train,y=y_train,cat_features=cat_features)
    print(f'spara model_senaste',datum)
    model.save_model('modeller/model_senaste')

    return df,nya_lopp, model,cat_features

### Här körs hela walkthrough

In [16]:
df, nya_lopp, model, cat_features = walkthrough(classic_test=False, verbose=False)

omgång 1: https://www.atg.se/spel/2022-02-12/V75/




klickade på ANPASSA
anpassa klar - break
ant resultat 7
ant lopp 7
EUR: False NOK: False
priser ['Pris: 150.000-75.000-40.000-25.000-15.000-11.500-7.500-5.000 kr (8 priser). Lägst 2.500 kr till alla tävlande.', 'Pris: 150.000-75.000-40.000-25.000-15.000-11.500-7.500-5.000 kr (8 priser). Lägst 2.500 kr till alla tävlande.', 'Pris: 150.000-75.000-40.000-25.000-15.000-11.500-7.500-5.000 kr (8 priser). Lägst 2.500 kr till alla tävlande.', 'Pris: 150.000-75.000-40.000-25.000-15.000-11.500-7.500-5.000 kr (8 priser). Lägst 2.500 kr till alla tävlande.', 'Pris: 150.000-75.000-40.000-25.000-15.000-11.500-7.500-5.000 kr (8 priser). Lägst 2.500 kr till alla tävlande.', 'Pris: 150.000-75.000-40.000-25.000-15.000-11.500-7.500-5.000 kr (8 priser). Lägst 2.500 kr till alla tävlande.', 'Pris: 150.000-75.000-40.000-25.000-15.000-11.500-7.500-5.000 kr (8 priser). Lägst 2.500 kr till alla tävlande.']
Ant priser 7
pris: 150.000
ant names,vodds,podds,rader,streck 15 15 15 15
AVD 1 ÅBY 2140 VOLTSTART ......

## Kör allt ovanför walkthrough
### Se till att "omg_att_spela_link.csv" är ifylld

## init  - kör först allt t.o.m 'replace_NaN()' ovan

In [17]:
model = get_model().load_model('modeller/model_senaste')
dforg = pd.read_csv('all_data.csv')     
# print(df.columns)
df=remove_features(dforg.copy())
# df['avd']=dforg.avd
cat_features = list(df.loc[:,df.dtypes=='O'].columns)
df,_ = replace_NaN(df.copy(), cat_features=cat_features)    
y=df.plac
y=(y==1)*1
df.drop('plac',axis=1,inplace=True)

## cv

In [18]:

cv_pool = Pool(df,y,cat_features=cat_features)

params = {
         'use_best_model': True,
         'eval_metric' : 'AUC',
         "loss_function": "Logloss",
         'early_stopping_rounds': 100,
         'verbose': 50,
}

cv_score =cv(pool=cv_pool, 
   params=params, 
   dtrain=None, 
   iterations=2000, 
   num_boost_round=None,
   fold_count=5, 
   nfold=None,
   inverted=False,
   partition_random_seed=0,
   seed=2021, 
   shuffle=False, 
   logging_level=None, 
   stratified=True,
   as_pandas=True,
   type='TimeSeries')

Training on fold [0/5]
0:	test: 0.7577436	best: 0.7577436 (0)	total: 60.9ms	remaining: 2m 1s
50:	test: 0.8104227	best: 0.8104940 (48)	total: 3.42s	remaining: 2m 10s
100:	test: 0.8169849	best: 0.8174740 (92)	total: 6.1s	remaining: 1m 54s
150:	test: 0.8151948	best: 0.8174740 (92)	total: 8.93s	remaining: 1m 49s

bestTest = 0.8174739923
bestIteration = 92

Training on fold [1/5]
0:	test: 0.7232253	best: 0.7232253 (0)	total: 43.2ms	remaining: 1m 26s
50:	test: 0.8096841	best: 0.8096841 (50)	total: 3.87s	remaining: 2m 27s
100:	test: 0.8138909	best: 0.8153313 (84)	total: 7.52s	remaining: 2m 21s
150:	test: 0.8111057	best: 0.8153313 (84)	total: 11s	remaining: 2m 15s

bestTest = 0.8153312554
bestIteration = 84

Training on fold [2/5]
0:	test: 0.7242193	best: 0.7242193 (0)	total: 57.8ms	remaining: 1m 55s
50:	test: 0.8016739	best: 0.8016739 (50)	total: 4.04s	remaining: 2m 34s
100:	test: 0.8022281	best: 0.8044334 (72)	total: 8.01s	remaining: 2m 30s
150:	test: 0.8006545	best: 0.8044334 (72)	total: 12

In [19]:
cv_score

Unnamed: 0,iterations,test-AUC-mean,test-AUC-std,test-Logloss-mean,test-Logloss-std,train-Logloss-mean,train-Logloss-std
0,0,0.727558,0.022098,0.657984,0.001240,0.657805,0.001459
1,1,0.738448,0.029622,0.627612,0.001277,0.627290,0.001548
2,2,0.767376,0.022454,0.598089,0.001852,0.597492,0.001689
3,3,0.778122,0.022743,0.570242,0.002050,0.569492,0.001882
4,4,0.781344,0.020052,0.547427,0.003342,0.546633,0.002818
...,...,...,...,...,...,...,...
225,225,0.811179,0.006108,0.239623,0.003490,0.206466,0.006852
226,226,0.811178,0.006107,0.239624,0.003490,0.206461,0.006851
227,227,0.811182,0.006109,0.239621,0.003491,0.206451,0.006848
228,228,0.811180,0.006108,0.239622,0.003490,0.206443,0.006846


In [20]:
from IPython.display import display
print(df.datum.min(),df.datum.max())
display(cv_score[cv_score['test-Logloss-mean'].min() == cv_score['test-Logloss-mean']])
display(cv_score[cv_score['test-AUC-mean'].max() == cv_score['test-AUC-mean']])

2014-12-28 2022-02-12


Unnamed: 0,iterations,test-AUC-mean,test-AUC-std,test-Logloss-mean,test-Logloss-std,train-Logloss-mean,train-Logloss-std
129,129,0.812133,0.006302,0.238761,0.003573,0.213453,0.003744


Unnamed: 0,iterations,test-AUC-mean,test-AUC-std,test-Logloss-mean,test-Logloss-std,train-Logloss-mean,train-Logloss-std
91,91,0.813145,0.006311,0.239911,0.003354,0.221208,0.001907


In [21]:
from sklearn.model_selection import train_test_split

df[['datum','avd','streck','häst','kusk']] = dforg[['datum','avd','streck','häst','kusk']]

# df.drop('datum',axis=1,inplace=True)
df.drop('avd',axis=1,inplace=True)
df.drop(['streck'],axis=1,inplace=True)
# df.drop(['häst','kusk'],axis=1,inplace=True)
cat_features = list(df.loc[:,df.dtypes=='O'].columns)
X_train,X_test,y_train,y_test = train_test_split(df,y,shuffle=False,)
print("test:",X_test.datum.min(),X_test.datum.max())
print("train:",X_train.datum.min(),X_train.datum.max())
cb=get_model(use_best=True)
cb.fit(X_train,y_train,eval_set= (X_test,y_test),early_stopping_rounds=200, cat_features=cat_features,verbose=100)

test: 2020-01-04 2022-02-12
train: 2014-12-28 2020-01-04
0:	learn: 0.6328976	test: 0.6073440	best: 0.6073440 (0)	total: 108ms	remaining: 5m 24s
100:	learn: 0.7078480	test: 0.6397288	best: 0.6422302 (79)	total: 11s	remaining: 5m 15s
200:	learn: 0.7316776	test: 0.6422295	best: 0.6448526 (118)	total: 21s	remaining: 4m 51s
300:	learn: 0.7509815	test: 0.6432699	best: 0.6455066 (269)	total: 30.9s	remaining: 4m 37s
400:	learn: 0.7722690	test: 0.6368436	best: 0.6459278 (305)	total: 41.1s	remaining: 4m 26s
500:	learn: 0.7941278	test: 0.6357897	best: 0.6459278 (305)	total: 51.2s	remaining: 4m 15s
Stopped by overfitting detector  (200 iterations wait)

bestTest = 0.6459277776
bestIteration = 305

Shrink model to first 306 iterations.


<catboost.core.CatBoostClassifier at 0x20da48e6310>

In [22]:
X_test[['datum','avd','vodds']] = dforg[['datum','avd','vodds']]
predict_prob = cb.predict_proba(X_test)

_,prob_score = proba_order_score(X_test ,y_test, predict_prob)

print('cb med ekipage',prob_score, cb.best_score_['validation']['AUC'])

log(proba) -0.7567074968513359
cb med ekipage 4.049145299145299 0.7077197490780207


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self[k1] = value[k2]


# FLAML (med och utan ekipage och streck)
För att köra enbart FLAML initiera först med allt innan plus walkthrough  

In [23]:
def ordinal_enc(df_, features):
    df = df_.copy()
    from sklearn.preprocessing import OrdinalEncoder
    enc = OrdinalEncoder(handle_unknown='use_encoded_value', unknown_value=100000)
    enc.fit(df[[features]])
    df[features] = enc.transform(df[[features]])
    return df,enc

# df,enc = ordinal_enc(dforg,'häst')
# import pickle
# pickle.dump(enc, open('modeller/encoder.sav', 'wb'))


In [24]:
def split_data(df_,train_from_proc=0,test_proc=0.25):
    # train_from_proc = where to start both train and test
    # test_proc = how much of the data is test
    df=df_.copy()
    alla_datum = df.datum.unique()
    train_from_datum = alla_datum[ int(len(alla_datum)*train_from_proc)]
    X_test=None
    y_test=None
    
    if test_proc:
        selected_data = alla_datum[ alla_datum >= train_from_datum ]
        test_from_datum = selected_data[ int(len(selected_data)*(1-test_proc)) ]
        X_test  = df[df.datum >= test_from_datum]
        y_test  = (X_test.plac==1)*1
        X_test  = X_test.drop('plac',axis=1)
        print(f'test from {X_test.datum.min()} to {X_test.datum.max()} (incl)')
    
        X_train = df[(df.datum >= train_from_datum) & (df.datum < test_from_datum) ]
    else:
        print('No test')
        X_train = df[(df.datum >= train_from_datum)]
    
    y_train = (X_train.plac==1)*1
    
    print(f'train from {X_train.datum.min()} to {X_train.datum.max()} (incl)')
    
    return X_train.drop('plac', axis=1), X_test, y_train, y_test


In [25]:
# prepare all data för flaml
dforg = pd.read_csv('all_data.csv')  

### enc is the encoder that we will save for use during v75_spel.py ###
### It will be used and finally saved later in this code ###
df,env = ordinal_enc(dforg,'häst')

X_train, X_test, y_train, y_test= split_data(df,train_from_proc=0,test_proc=0.25)
X_train = remove_features(X_train)
X_test  = remove_features( X_test)

# X_train = X_train.drop('streck', axis=1)
# X_test  = X_test.drop ('streck', axis=1)
# X_train.drop('datum', axis=1, inplace=True)
# X_test.drop( 'datum', axis=1, inplace=True)
cat_features = list(X_train.select_dtypes('object').columns)
# X_train, X_test = replace_NaN(X_train.copy(),X_test=X_test.copy(), cat_features=cat_features) 
# X_train.fillna(-1)
# X_test.fillna(-1)
print(cat_features)
X_train.shape,X_test.shape

test from 2019-12-31 to 2022-02-12 (incl)
train from 2014-12-28 to 2019-12-30 (incl)
['datum', 'bana', 'kusk', 'kön', 'h1_kusk', 'h1_bana', 'h2_kusk', 'h2_bana', 'h3_kusk', 'h3_bana', 'h4_kusk', 'h4_bana', 'h5_kusk', 'h5_bana']


((32692, 68), (11041, 68))

In [26]:
from flaml import AutoML 
cat_features = list(X_train.select_dtypes('object').columns)
starting_points={'lgbm': {'n_estimators': 38,
  'num_leaves': 4,
  'min_child_samples': 2,
  'learning_rate': 0.19098448074739216,
  'log_max_bin': 7,
  'colsample_bytree': 0.8827412174089042,
  'reg_alpha': 0.004577823970660193,
  'reg_lambda': 0.03815584533462228},
 'rf': {'n_estimators': 33,
  'max_features': 0.3251674877768946,
  'max_leaves': 89,
  'criterion': 'entropy'},
 'catboost': {'early_stopping_rounds': 50,
  'learning_rate': 0.007511731949060241},
 'xgboost': {'n_estimators': 575,
  'max_leaves': 46,
  'min_child_weight': 1.032235057697502,
  'learning_rate': 0.013318439439138472,
  'subsample': 0.7908401179782586,
  'colsample_bylevel': 0.6924750037579576,
  'colsample_bytree': 0.7174828796230647,
  'reg_alpha': 0.15461500385937774,
  'reg_lambda': 0.6619886587472544},
 'extra_tree': {'n_estimators': 47,
  'max_features': 0.7934349565988307,
  'max_leaves': 213,
  'criterion': 'entropy'}}
flml_raw_parms={'task': 'classification','split_type':'time', 'metric':'roc_auc', 'starting_points': starting_points,'verbose':False,
        'time_budget':1200, 'max_iter':50000000,'n_jobs':5, 'X_val': X_test, 'y_val':y_test,'early_stop':True, 'ensemble':True}

automl_raw = AutoML()
automl_raw.fit(X_train,y_train, **flml_raw_parms)

In [27]:
flm_raw_train_pred= automl_raw.predict_proba(X_train)
flm_raw_test_pred = automl_raw.predict_proba(X_test)

X_test_raw = X_test.copy()
X_test_raw[['datum','avd','vodds']] = dforg[['datum','avd','vodds']]
_,prob_score = proba_order_score(X_test_raw,y_test, flm_raw_test_pred)

print('timeserie, datum,häst, kusk', prob_score, 1-automl_raw.best_loss)
# X_test.columns

log(proba) -1.866482924373347
timeserie, datum,häst, kusk 3.0984126984126985 0.8223201708047192


timeserie  0.3 0.25, datum, häst, kusk 3.720565149136578  0.7213763318649257 ... 1.9827526807785034 .....   best    
timeserie  0.4 0.25, datum, häst, kusk 3.7362637362637363 0.7214144007762124  
timeserie, 0.2 0.25, datum, häst, kusk 3.760989010989011  0.72561915325073230    
timeserie, 0.1 0.25  datum, häst, kusk 3.8180708180708183 0.726597977829505    
timeserie, 0.5 0.25, datum, häst, kusk 3.936263736263736  0.7216626969090024  
timeserie, 0.3 0.25, datum, häst, kusk streck, NaN 3.0706436420722136  0.8230307821948237   
timeserie, 0.3 0.25, datum, häst, kusk,streck  3.0549450549450547 0.8232840226857013 ... -1.7710182666778564 .......... best   
timeserie, 0.3 0.25, datum, häst, kusk streck, NaN, fillna, 3.0549450549450547 0.8237003593459333   
timeserie, 0.3 0.25, datum, häst, kusk, streck 3.06436420722135   0.8232840226857013       
timeserie, 0.4 0.25, datum, häst, kusk, streck 3.1483516483516483 0.8169106155467452  
timeserie, 0.2 0.25, datum, häst, kusk, streck 3.0824175824175826 0.8220287891340522

## Final FLML model

In [28]:
def run_flaml(X_train, y_train, df_perf, save=True):
    import pickle
    from_date = X_train.datum.min()
    to_date = X_train.datum.max()
    
    for with_streck in [True, False]:
        if with_streck: 
            X_tr = X_train.copy()    
            filename = 'modeller\\FLAML_model.sav'
        else:
            X_tr = X_train.drop('streck', axis=1).copy()
            filename = 'modeller\\FLAML2_model.sav'
            
        print('with_streck = ',with_streck)   
    
        automl = AutoML()
        flml_parms={'task': 'classification','split_type':'time', 'metric':'roc_auc','starting_points': starting_points, 'verbose':False,
        'time_budget':1700, 'max_iter':400000000,'n_jobs':5, 'early_stop':True, 'ensemble':True}

        automl.fit(X_tr, y_train, **flml_parms)
        perf = 1-automl.best_loss
        print(perf, 'for streck in columns', with_streck)
        df_perf.loc[len(df_perf)] = [from_date, to_date, with_streck, perf]
        
        # save_model
        if save:
            print('save model in',filename)        
            pickle.dump(automl, open(filename, 'wb')) 
            print('save encoder enc in encoder.sav')
            pickle.dump(enc, open('modeller/encoder.sav', 'wb'))

    # remove duplicates
    df_perf.drop_duplicates(subset=['learn_from','learn_to','streck'], keep='last', inplace=True)

    print('\n','\n')
    display(df_perf.tail(30).sort_values(by=['perf'], ascending=False))
    print('Med streck max:',df_perf.loc[df_perf.streck == True].perf.max())
    print('Ej  streck max:',df_perf.loc[df_perf.streck == False].perf.max())

    df_perf.to_csv('perf_flaml.csv', index=False)

In [29]:
# prepare all data för flaml
dforg = pd.read_csv('all_data.csv')  
df,enc = ordinal_enc(dforg, 'häst')
X_train, _, y_train, _ = split_data(df,train_from_proc=0,test_proc=None)
X_train = remove_features(X_train)

df_perf = pd.read_csv('perf_flaml.csv')
run_flaml(X_train, y_train, df_perf)


No test
train from 2014-12-28 to 2022-02-12 (incl)
with_streck =  True
0.8178110106472746 for streck in columns True
save model in modeller\FLAML_model.sav
save encoder enc in encoder.sav
with_streck =  False
0.7261161075195017 for streck in columns False
save model in modeller\FLAML2_model.sav
save encoder enc in encoder.sav

 



Unnamed: 0,learn_from,learn_to,streck,perf
2,2017-02-26,2022-01-01,True,0.818728
0,2017-02-26,2021-12-31,True,0.818714
16,2014-12-28,2022-02-12,True,0.817811
14,2014-12-28,2022-02-05,True,0.817489
12,2014-12-28,2022-01-29,True,0.817124
10,2014-12-28,2022-01-22,True,0.816984
4,2014-12-28,2022-01-01,True,0.816678
6,2014-12-28,2022-01-08,True,0.816579
8,2014-12-28,2022-01-15,True,0.81633
5,2014-12-28,2022-01-01,False,0.728704


Med streck max: 0.8187283877462516
Ej  streck max: 0.7287040948165976


# remove garbage

In [30]:
import subprocess
subprocess.call([r'C:/Users/peter/Documents/MyProjects/PyProj/Trav/spel/remove_dirt.bat'])

0