In [1]:
!pip install flaml -i https://pypi.tuna.tsinghua.edu.cn/simple

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting flaml
  Downloading https://pypi.tuna.tsinghua.edu.cn/packages/f4/9f/d4c5e02f30fc8842005f9a448316c397eee98c4ee865e403dce9636aa61a/FLAML-2.1.2-py3-none-any.whl (296 kB)
     ---------------------------------------- 0.0/296.7 kB ? eta -:--:--
     ------------------------------------ - 286.7/296.7 kB 8.9 MB/s eta 0:00:01
     ------------------------------------ - 286.7/296.7 kB 8.9 MB/s eta 0:00:01
     ------------------------------------ - 286.7/296.7 kB 8.9 MB/s eta 0:00:01
     ------------------------------------ - 286.7/296.7 kB 8.9 MB/s eta 0:00:01
     ------------------------------------ - 286.7/296.7 kB 8.9 MB/s eta 0:00:01
     ------------------------------------ - 286.7/296.7 kB 8.9 MB/s eta 0:00:01
     ------------------------------------ - 286.7/296.7 kB 8.9 MB/s eta 0:00:01
     ------------------------------------ - 286.7/296.7 kB 8.9 MB/s eta 0:00:01
     ------------------------------------ - 286

In [2]:
import pandas as pd
df = pd.read_csv('./df_train2.csv')
df_t = pd.read_csv('./df_test2.csv')

In [3]:
def feature_engineering(df,df_test,target,n_jobs=4):
    from openfe import OpenFE, transform
    train_x,test_x,train_y,n_jobs = df.drop(target,axis=1),df_test,df[target],n_jobs
    ofe = OpenFE()
    features = ofe.fit(data=train_x, label=train_y, n_jobs=n_jobs)  # generate new features
    train_x, test_x = transform(train_x, test_x, features, n_jobs=n_jobs) # transform the train and test data according to generated features.
    return train_x, test_x

def ag_tuning(df_train,df_test,id_name='0620',label='Survived',loss='log_loss',presets='best_quality',dynamic_stacking=True,folds=10,time=60*3):
    from autogluon.tabular import TabularPredictor
    from autogluon.features.generators import AutoMLPipelineFeatureGenerator
    auto_ml_pipeline_feature_generator = AutoMLPipelineFeatureGenerator()
    from autogluon.tabular.configs.hyperparameter_configs import get_hyperparameter_config
    
    custom_hyperparameters = get_hyperparameter_config('default')
    
    def get_oof_ts(predictor,_train_x,_test_x,ids,label):
        predictions_oof = predictor.predict_oof()
        prdict_y = predictor.predict(_test_x)
        pd.DataFrame({'oofs': predictions_oof, 'predict_y': prdict_y}).to_csv(f'model_hc_systhis_{ids}.csv',index=False)
        _train_x[ids],_test_x[ids]  = predictions_oof,prdict_y
        _train_x.to_csv(f'model_hc_tr_{ids}.csv',index=False)
        _test_x.to_csv(f'model_hc_ts_{ids}.csv',index=False)
        return predictions_oof,prdict_y
    
    predictor = TabularPredictor(
        label=label,
        # problem_type=problem,#'binary',
        eval_metric=loss #'acc'
    ).fit(
        df_train, #df, #df_train,
        presets='best_quality',
    #     unlabeled_data = df_test2,   #df_test2.drop(columns='Survived'), #df_train[800:].drop(columns='Survived'),
        # auto_stack=True,
        feature_generator=auto_ml_pipeline_feature_generator,
        ag_args_ensemble = {'fold_fitting_strategy':'sequential_local'},
        dynamic_stacking=dynamic_stacking,
    #     num_stack_levels=1,
        num_bag_folds=folds,
        # time_limit=time,
        # included_model_types = ['XGB','NN_TORCH','CATBOOST','LIGHTGBM','FAST_AI'],
    #     excluded_model_types = ['NN_TORCH'],
        # use_bag_holdout=True,
        # hyperparameters=custom_hyperparameters,
        hyperparameter_tune_kwargs='auto',#hyperparameter_tune_kwargs,
        # keep_only_best = True,
    )
    oofs,predictions = get_oof_ts(predictor,df_train,df_test,id_name,label)
    return predictions,oofs,predictor
# show_info(predictor,train_x)

def show_info(predictor,df_train):
    print('++++++++++++++++++++++++++++++++++++++++++++++')
    print(predictor.fit_summary())
    print('++++++++++++++++++++++++++++++++++++++++++++++')
    print(predictor.leaderboard(df_train,silent=True))
    print('++++++++++++++++++++++++++++++++++++++++++++++')
    print(predictor.feature_importance(df_train,silent=True))
    print('++++++++++++++++++++++++++++++++++++++++++++++')

from sklearn.metrics import mean_squared_error,roc_auc_score
def hill_climbers(files,train_path,test_path,label='Survived',func=roc_auc_score):
    from hillclimbers import climb_hill, partial
    
    train,test = pd.read_csv(train_path),pd.read_csv(test_path)
    oofs,pred = pd.DataFrame(),pd.DataFrame()
    for i in range(len(files)):
        tmp = pd.read_csv(files[i])
        oofs[f'm{i}'],pred[f'm{i}'] = tmp['oofs'].dropna(),tmp['predict_y'].dropna()

    test_preds, oof_preds = climb_hill(
        train=train, 
        oof_pred_df=oofs, 
        test_pred_df=pred,
        target=label,
        objective="maximize",
        eval_metric=partial(func),
        negative_weights=True, 
        precision=0.001,
        plot_hill=True, 
        plot_hist=True,
        return_oof_preds=True
    )
    return test_preds, oof_preds
# a,b = hill_climbers(files,train_path,test_path=test_path,label='Survived')

def titanic_acc(predictions):
    from sklearn.metrics import accuracy_score
    acc= pd.read_csv('./acc.csv')
    acc_ans = acc.Survived
    print(accuracy_score(acc_ans,predictions)) 

def submission(predictions,test_x,cols,label,id_name):
    test_x[label] = predictions
    test_x[cols].to_csv(f'submission_{id_name}.csv',index=False)
    
    
def pipline(df1,df1_test,label='Survived',time=60*3,id_name='0620_v3',loss='log_loss',cols=['1','2']):
    # train_x, test_x = feature_engineering(df1,df1_test,label)
    # train_x[label] = df1[label]
    # train_x.to_csv('tr.csv',index=False)
    # test_x.to_csv('ts.csv',index=False)
    train_x,test_x = df1,df1_test
    predictions,oofs,predictor = ag_tuning(train_x,test_x,id_name=id_name,time=time,loss=loss,label=label)
    show_info(predictor,train_x)
    # titanic_acc(predictions)
    submission(predictions,test_x,cols,label,id_name)
    return predictions


In [29]:
fe

[<openfe.FeatureGenerator.Node at 0x1cd9206b100>]

In [44]:
def get_feather(train_x,train_y,n_jobs=4):
    from openfe import OpenFE
    ofe = OpenFE()
    return ofe.fit(data=train_x, label=train_y, n_jobs=n_jobs)  # generate new features


def feature_engineering(df,df_test,target,n_jobs=4,features=[]):
    from openfe import transform
    train_x,test_x,train_y,n_jobs = df.drop(target,axis=1),df_test,df[target],n_jobs
    if features == []:
        fe = get_feather(train_x,train_y,n_jobs)
    else:
        fe = features
    print(fe)
    train_x, test_x = transform(train_x, test_x, fe, n_jobs=n_jobs) # transform the train and test data according to generated features.
    return train_x, test_x,fe
train_x, test_x,fe = feature_engineering(df.sample(n=2000),df_t.sample(n=2000),'Target',features=[])
train_x

The number of candidate features is 13143
Start stage I selection.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [01:10<00:00,  4.43s/it]


7327 same features have been deleted.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:43<00:00,  2.72s/it]


Meet early-stopping in successive feature-wise halving.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:38<00:00,  2.43s/it]


The number of remaining candidate features is 1999
Start stage II selection.


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 16/16 [00:09<00:00,  1.76it/s]


Finish data processing.
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.036001 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 134405
[LightGBM] [Info] Number of data points in the train set: 1600, number of used features: 2032
[<openfe.FeatureGenerator.Node object at 0x000001CD87CA9EB0>, <openfe.FeatureGenerator.Node object at 0x000001CD92310F40>, <openfe.FeatureGenerator.Node object at 0x000001CD884547F0>, <openfe.FeatureGenerator.Node object at 0x000001CD91FD7880>, <openfe.FeatureGenerator.Node object at 0x000001CD87C85E80>, <openfe.FeatureGenerator.Node object at 0x000001CD91F9B1C0>, <openfe.FeatureGenerator.Node object at 0x000001CD91F029A0>, <openfe.FeatureGenerator.Node object at 0x000001CD920FB5B0>, <openfe.FeatureGenerator.Node object at 0x000001CD923E56A0>, <openfe.FeatureGenerator.Node object at 0x000001CD8844D400>, <openfe.FeatureGenerator.Node object at 0x000001CD91C628E0>, <openfe.Feat

AttributeError: 'tuple' object has no attribute 'tb_frame'

In [33]:
test_x

Unnamed: 0,Marital status,Application mode,Application order,Course,Daytime/evening attendance,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,...,Curricular units 2nd sem (credited),Curricular units 2nd sem (enrolled),Curricular units 2nd sem (evaluations),Curricular units 2nd sem (approved),Curricular units 2nd sem (grade),Curricular units 2nd sem (without evaluations),Unemployment rate,Inflation rate,GDP,autoFE_f_0
0,1,1,1,9500,1,1,141.0,1,3,1,...,0,8,0,0,0.000000,0,13.9,-0.3,0.79,5.073755
1,1,1,1,9238,1,1,128.0,1,1,19,...,0,6,6,6,13.500000,0,11.1,0.6,2.02,2.804398
2,1,1,1,9238,1,1,118.0,1,1,19,...,0,6,11,5,11.000000,0,15.5,2.8,-4.06,5.287255
3,1,44,1,9147,1,39,130.0,1,1,19,...,3,8,14,5,11.000000,0,8.9,1.4,3.51,4.329636
4,1,39,1,9670,1,1,110.0,1,1,37,...,0,6,9,4,10.666667,2,7.6,2.6,0.32,2.253893
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51007,1,1,2,171,1,1,128.0,1,38,37,...,0,0,0,0,0.000000,0,15.5,2.8,-4.06,5.287255
51008,2,39,1,9119,1,19,133.1,1,19,37,...,0,5,5,0,0.000000,0,9.4,-0.8,-3.12,2.374457
51009,1,1,1,171,1,1,127.0,1,1,1,...,0,0,0,0,0.000000,0,15.5,2.8,-4.06,5.287255
51010,1,1,3,9773,1,1,132.0,1,19,19,...,0,6,9,3,13.000000,0,7.6,2.6,0.32,2.253893


In [35]:
import numpy as np
def transform(df):
    features = df.columns.tolist()
    sorted_new =  np.sort(df.values, axis=1)
    df['mean_features'] = 0.1 * df[features].mean(axis=1)
    df['std_features'] = df[features].std(axis=1)
    df['max_features'] = df[features].max(axis=1)
    df['min_features'] = df[features].min(axis=1)
    df['median_features'] = 0.1 * df[features].median(axis=1)
    df['sum_features'] = 0.1 * df[features].sum(axis=1)

    # num_cols = features
    # df['sum'] = df[num_cols].sum(axis=1)
    # df['std']  = df[num_cols].std(axis=1)
    # df['mean'] = df[num_cols].mean(axis=1)
    # df['max']  = df[num_cols].max(axis=1)
    # df['min']  = df[num_cols].min(axis=1)
    # df['mode'] = df[num_cols].mode(axis=1)[0]
    # df['median'] = df[num_cols].median(axis=1)
    # df['q_25th'] = df[num_cols].quantile(0.25, axis=1)
    # df['q_75th'] = df[num_cols].quantile(0.75, axis=1)
    # df['skew'] = df[num_cols].skew(axis=1)
    # df['kurt'] = df[num_cols].kurt(axis=1)
    # df['sum_72_76'] = df['sum'].isin(np.arange(72, 76))
    # for i in range(10,100,10):
    #     df[f'{i}th'] = df[num_cols].quantile(i/100, axis=1)
    # df['harmonic'] = len(num_cols) / df[num_cols].apply(lambda x: (1/x).mean(), axis=1)
    # df['geometric'] = df[num_cols].apply(lambda x: x.prod()**(1/len(x)), axis=1)
    # df['zscore'] = df[num_cols].apply(lambda x: (x - x.mean()) / x.std(), axis=1).mean(axis=1)
    # df['cv'] = df['std'] / df['mean']
    # df['Skewness_75'] = (df[num_cols].quantile(0.75, axis=1) - df[num_cols].mean(axis=1)) / df[num_cols].std(axis=1)
    # df['Skewness_25'] = (df[num_cols].quantile(0.25, axis=1) - df[num_cols].mean(axis=1)) / df[num_cols].std(axis=1)
    # df['2ndMoment'] = df[num_cols].apply(lambda x: (x**2).mean(), axis=1)
    # df['3rdMoment'] = df[num_cols].apply(lambda x: (x**3).mean(), axis=1)
    # df['entropy'] = df[num_cols].apply(lambda x: -1*(x*np.log(x)).sum(), axis=1)
    # # df1 = pd.concat([df, pd.DataFrame(sorted_new, index=df.index)], axis=1)
    # # df1 = df1.drop(features, axis=1)
    # # df1.columns = df1.columns.astype('str')
    return df

df_train = transform(train_x)#df.drop(['Target'],axis=1))
df_test = transform(test_x) #df_t.drop([],axis=1))


Unnamed: 0,Marital status,Application mode,Application order,Course,Daytime/evening attendance,Previous qualification,Previous qualification (grade),Nacionality,Mother's qualification,Father's qualification,...,Inflation rate,GDP,autoFE_f_0,mean_features,std_features,max_features,min_features,median_features,sum_features,Target
0,1,1,1,9238,1,1,126.0,1,1,19,...,0.6,2.02,2.804398,26.008251,1517.218180,9238.0,0.00,0.100000,962.305297,Graduate
1,1,17,1,9238,1,1,125.0,1,19,19,...,0.6,2.02,2.804398,26.059255,1517.124605,9238.0,0.00,0.100000,964.192440,Dropout
2,1,17,2,9254,1,1,137.0,1,3,19,...,0.3,-0.92,6.019190,26.052160,1519.926488,9254.0,-0.92,0.100000,963.929919,Dropout
3,1,1,3,9500,1,1,131.0,1,19,3,...,0.6,2.02,2.804398,26.767664,1560.213470,9500.0,0.00,0.202000,990.403565,Enrolled
4,1,1,2,9500,1,1,132.0,1,19,37,...,2.6,0.32,2.253893,26.856056,1560.059255,9500.0,0.00,0.225389,993.674056,Graduate
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76513,1,17,1,9254,1,1,121.0,1,19,1,...,-0.3,0.79,5.073755,26.073596,1519.787359,9254.0,-0.30,0.100000,964.723042,Graduate
76514,1,1,6,9254,1,1,125.0,1,1,38,...,-0.8,-3.12,2.374457,26.168458,1519.675076,9254.0,-3.12,0.100000,968.232946,Graduate
76515,5,17,1,9085,1,1,138.0,1,37,37,...,-0.8,-3.12,2.374457,25.840688,1491.691334,9085.0,-3.12,0.400000,956.105446,Enrolled
76516,1,1,3,9070,1,1,136.0,1,38,37,...,2.6,0.32,2.253893,25.601551,1489.567852,9070.0,0.00,0.100000,947.257389,Dropout


In [5]:
# df_cp = df_train.combine_first(df)
# df_cp
df_train['Target'] = df['Target'] 
df_train

Unnamed: 0,Admission grade,Age at enrollment,Application mode,Application order,Course,Curricular units 1st sem (approved),Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),Curricular units 1st sem (evaluations),Curricular units 1st sem (grade),...,Scholarship holder,Target,Tuition fees up to date,Unemployment rate,max_features,mean_features,median_features,min_features,std_features,sum_features
0,122.6,18,1,1,9238,6,0,6,6,14.500000,...,1,Graduate,1,11.1,9238.0,26.722913,0.100,0.00,1538.108351,962.024857
1,119.8,18,17,1,9238,4,0,6,8,11.600000,...,0,Dropout,1,11.1,9238.0,26.775333,0.100,0.00,1538.010902,963.912000
2,144.7,18,17,2,9254,0,0,6,0,0.000000,...,0,Dropout,1,16.2,9254.0,26.759111,0.100,-0.92,1540.869739,963.328000
3,126.1,18,1,3,9500,7,0,7,9,12.591250,...,1,Enrolled,1,11.1,9500.0,27.503420,0.201,0.00,1581.694142,990.123125
4,120.1,18,1,2,9500,6,0,7,12,12.933333,...,0,Graduate,1,7.6,9500.0,27.595796,0.230,0.00,1581.530601,993.448667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
76513,116.5,18,17,1,9254,6,0,6,9,10.666667,...,1,Graduate,1,13.9,9254.0,26.783769,0.100,-0.30,1540.722942,964.215667
76514,131.6,19,1,6,9254,4,0,6,22,13.000000,...,0,Graduate,1,9.4,9254.0,26.888764,0.100,-3.12,1540.591116,967.995500
76515,123.3,19,17,1,9085,4,0,5,13,12.500000,...,0,Enrolled,1,9.4,9085.0,26.551889,0.450,-3.12,1512.214797,955.868000
76516,124.8,18,1,3,9070,0,0,6,0,0.000000,...,0,Dropout,1,7.6,9070.0,26.306444,0.100,0.00,1510.071542,947.032000


In [6]:
# df_test_cp  = df_test.combine_first(df_t)
# df_test_cp

Unnamed: 0,Admission grade,Age at enrollment,Application mode,Application order,Course,Curricular units 1st sem (approved),Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),Curricular units 1st sem (evaluations),Curricular units 1st sem (grade),...,Previous qualification (grade),Scholarship holder,Tuition fees up to date,Unemployment rate,max_features,mean_features,median_features,min_features,std_features,sum_features
0,152.1,18,1,1,9500,0,0,7,0,0.000000,...,141.0,0,0,13.9,9500.0,27.376361,0.0395,-0.30,1582.002355,985.549000
1,116.5,19,1,1,9238,6,0,6,7,14.857143,...,128.0,0,1,11.1,9238.0,26.712714,0.1000,0.00,1538.118351,961.657714
2,114.2,18,1,1,9238,6,0,6,11,12.000000,...,118.0,1,1,15.5,9238.0,26.698444,0.1000,-4.06,1538.117590,961.144000
3,130.0,23,44,1,9147,5,2,6,15,11.500000,...,130.0,1,1,8.9,9147.0,26.775861,0.3000,0.00,1522.457632,963.931000
4,106.0,26,39,1,9670,3,0,6,9,11.000000,...,110.0,0,1,7.6,9670.0,27.992185,0.2300,0.00,1609.929880,1007.718667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51007,124.7,19,1,2,171,0,0,0,0,0.000000,...,128.0,0,1,15.5,171.0,1.549833,0.0500,-4.06,40.045161,55.794000
51008,140.0,33,39,1,9119,0,0,5,6,0.000000,...,133.1,0,0,9.4,9119.0,26.640500,0.1000,-3.12,1517.926969,959.058000
51009,120.4,20,1,1,171,0,0,0,0,0.000000,...,127.0,0,0,15.5,171.0,1.321222,0.0000,-4.06,39.414092,47.564000
51010,126.3,18,1,3,9773,5,0,6,8,12.600000,...,132.0,0,0,7.6,9773.0,28.278944,0.2800,0.00,1627.160057,1018.042000


In [36]:
# X_train, y_train = df_cp.drop('Target',axis=1),df_cp['Target']
X_train, y_train = df_train.drop('Target',axis=1),df_train['Target']

In [37]:
from flaml import AutoML

automl = AutoML()
automl.fit(X_train, y_train, task="classification")

[flaml.automl.logger: 06-24 04:11:50] {1680} INFO - task = classification
[flaml.automl.logger: 06-24 04:11:50] {1691} INFO - Evaluation method: cv
[flaml.automl.logger: 06-24 04:11:50] {1789} INFO - Minimizing error metric: log_loss
[flaml.automl.logger: 06-24 04:11:51] {1901} INFO - List of ML learners in AutoML Run: ['lgbm', 'rf', 'catboost', 'xgboost', 'extra_tree', 'xgb_limitdepth', 'lrl1']
[flaml.automl.logger: 06-24 04:11:51] {2219} INFO - iteration 0, current learner lgbm
[flaml.automl.logger: 06-24 04:18:51] {2345} INFO - Estimated sufficient time budget=10000s. Estimated necessary time budget=10s.
[flaml.automl.logger: 06-24 04:18:51] {2392} INFO -  at 421.1s,	estimator lgbm's best error=0.4340,	best estimator lgbm's best error=0.4340
[flaml.automl.logger: 06-24 04:18:51] {2219} INFO - iteration 1, current learner rf
[flaml.automl.logger: 06-24 04:32:53] {2392} INFO -  at 1263.2s,	estimator rf's best error=0.4595,	best estimator lgbm's best error=0.4340
[flaml.automl.logger: 

In [40]:
# pd = automl.predict(df_test_cp)
pd = automl.predict(df_test)

In [41]:
import pandas

In [14]:
df_test_cp

Unnamed: 0,Admission grade,Age at enrollment,Application mode,Application order,Course,Curricular units 1st sem (approved),Curricular units 1st sem (credited),Curricular units 1st sem (enrolled),Curricular units 1st sem (evaluations),Curricular units 1st sem (grade),...,Previous qualification (grade),Scholarship holder,Tuition fees up to date,Unemployment rate,max_features,mean_features,median_features,min_features,std_features,sum_features
0,152.1,18,1,1,9500,0,0,7,0,0.000000,...,141.0,0,0,13.9,9500.0,27.376361,0.0395,-0.30,1582.002355,985.549000
1,116.5,19,1,1,9238,6,0,6,7,14.857143,...,128.0,0,1,11.1,9238.0,26.712714,0.1000,0.00,1538.118351,961.657714
2,114.2,18,1,1,9238,6,0,6,11,12.000000,...,118.0,1,1,15.5,9238.0,26.698444,0.1000,-4.06,1538.117590,961.144000
3,130.0,23,44,1,9147,5,2,6,15,11.500000,...,130.0,1,1,8.9,9147.0,26.775861,0.3000,0.00,1522.457632,963.931000
4,106.0,26,39,1,9670,3,0,6,9,11.000000,...,110.0,0,1,7.6,9670.0,27.992185,0.2300,0.00,1609.929880,1007.718667
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
51007,124.7,19,1,2,171,0,0,0,0,0.000000,...,128.0,0,1,15.5,171.0,1.549833,0.0500,-4.06,40.045161,55.794000
51008,140.0,33,39,1,9119,0,0,5,6,0.000000,...,133.1,0,0,9.4,9119.0,26.640500,0.1000,-3.12,1517.926969,959.058000
51009,120.4,20,1,1,171,0,0,0,0,0.000000,...,127.0,0,0,15.5,171.0,1.321222,0.0000,-4.06,39.414092,47.564000
51010,126.3,18,1,3,9773,5,0,6,8,12.600000,...,132.0,0,0,7.6,9773.0,28.278944,0.2800,0.00,1627.160057,1018.042000


In [42]:
def submission(predictions,test_x,cols,label,id_name):
    test_x[label] = predictions
    test_x[cols].to_csv(f'submission_{id_name}.csv',index=False)
test = pandas.read_csv('df_test.csv')
submission(pd,test,['id','Target'],'Target','academic_0624_v1_flaml')

In [43]:
!kaggle competitions submit -c playground-series-s4e6 -f submission_academic_0624_v1_flaml.csv -m "Message"

Successfully submitted to Classification with an Academic Success Dataset



  0%|          | 0.00/809k [00:00<?, ?B/s]
  1%|          | 8.00k/809k [00:00<00:38, 21.1kB/s]
100%|██████████| 809k/809k [00:02<00:00, 364kB/s]  
