In [1]:
import pandas as pd
import numpy as np

In [2]:
from autogluon.tabular import TabularPredictor

  from .autonotebook import tqdm as notebook_tqdm


# Autogluon 기본 사용법
* 1. 데이터 로딩
* 2. 타겟변수 지정
* 3. TabularPredictor 설정, (타겟변수, 모델 성능 지표) - 모델설정
* 4. 훈련(데이터, 제한시간설정, 분석사전설정 지정) - .fit()
* 5. 데이터에서 일부 데이터를 테스트 데이터로 추출 = .sample()
* 6. 분석이 끝난 모델로 테스트 데이터에서 추론 .predict()
* 7. 평가

# 1. 데이터 로딩

In [4]:
data = pd.read_csv("https://raw.githubusercontent.com/haram4th/ADsP/main/salary2.csv")
data.head()

Unnamed: 0,age,workclass,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
0,25,Private,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K
3,44,Private,Some-college,10,Married-civ-spouse,Machine-op-inspct,Husband,Black,Male,7688,0,40,United-States,>50K
4,18,,Some-college,10,Never-married,,Own-child,White,Female,0,0,30,United-States,<=50K


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 48842 entries, 0 to 48841
Data columns (total 14 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   age             48842 non-null  int64 
 1   workclass       46043 non-null  object
 2   education       48842 non-null  object
 3   education-num   48842 non-null  int64 
 4   marital-status  48842 non-null  object
 5   occupation      46033 non-null  object
 6   relationship    48842 non-null  object
 7   race            48842 non-null  object
 8   sex             48842 non-null  object
 9   capital-gain    48842 non-null  int64 
 10  capital-loss    48842 non-null  int64 
 11  hours-per-week  48842 non-null  int64 
 12  native-country  47985 non-null  object
 13  class           48842 non-null  object
dtypes: int64(5), object(9)
memory usage: 5.2+ MB


In [6]:
from sklearn.model_selection import train_test_split

In [7]:
train_data, test_data = train_test_split(data, stratify=data['class'], test_size=0.4, random_state=10)

In [8]:
train_data

Unnamed: 0,age,workclass,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
12689,49,Self-emp-not-inc,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,35,United-States,<=50K
30011,45,Private,HS-grad,9,Married-civ-spouse,Machine-op-inspct,Husband,White,Male,0,0,44,United-States,>50K
28454,55,Local-gov,HS-grad,9,Married-civ-spouse,Prof-specialty,Other-relative,White,Female,0,2246,40,United-States,>50K
17394,69,Private,HS-grad,9,Married-civ-spouse,Sales,Wife,White,Female,0,0,40,United-States,<=50K
31597,21,Private,Some-college,10,Never-married,Machine-op-inspct,Own-child,White,Female,0,0,40,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20424,31,Private,HS-grad,9,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,40,United-States,>50K
28686,18,,HS-grad,9,Never-married,,Own-child,White,Female,0,0,40,United-States,<=50K
30381,28,Private,Some-college,10,Never-married,Transport-moving,Not-in-family,White,Male,0,0,50,United-States,<=50K
48693,61,Private,Some-college,10,Divorced,Other-service,Not-in-family,Black,Male,0,0,40,United-States,<=50K


In [9]:
test_data 

Unnamed: 0,age,workclass,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,class
22332,56,,HS-grad,9,Divorced,,Not-in-family,White,Male,0,0,10,United-States,<=50K
33515,57,Private,HS-grad,9,Divorced,Adm-clerical,Unmarried,White,Female,0,0,42,United-States,<=50K
39475,37,State-gov,Doctorate,16,Married-civ-spouse,Prof-specialty,Husband,White,Male,0,0,45,United-States,<=50K
11140,19,Private,Some-college,10,Never-married,Other-service,Not-in-family,White,Female,0,0,20,United-States,<=50K
39998,41,State-gov,Bachelors,13,Never-married,Prof-specialty,Not-in-family,White,Male,0,0,40,United-States,<=50K
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
43439,21,Private,Some-college,10,Never-married,Adm-clerical,Not-in-family,White,Female,0,0,45,United-States,<=50K
3527,28,Self-emp-inc,Bachelors,13,Never-married,Exec-managerial,Own-child,White,Male,0,0,60,United-States,<=50K
46661,27,Private,HS-grad,9,Separated,Machine-op-inspct,Not-in-family,White,Male,0,0,43,United-States,<=50K
28427,38,Private,Some-college,10,Married-civ-spouse,Exec-managerial,Husband,White,Male,0,0,45,United-States,<=50K


# 2. 타겟변수 지정

In [10]:
target_column = "class"

# 3. 제한시간, 검정지표(평가지표, accuracy, rmse, roc_auc) 지정  

In [14]:
# 초 단위로 제한시간 지정  300초는 5분, 
time_limit = 300
# 성능지표(accuracy, roc_auc, root_mean_squared_error, r2, f1, recall, precision, roc_auc, mean_squared_error) 
metric = 'accuracy'

# 4. 모델정의 TablularPredictor()

In [15]:
model = TabularPredictor(label=target_column, eval_metric=metric)

No path specified. Models will be saved in: "AutogluonModels/ag-20241017_024644"


# 5. 모델 훈련 

In [16]:
model.fit(train_data, time_limit=time_limit, presets='medium_quality')

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Mar 29 23:14:13 UTC 2024
CPU Count:          8
Memory Avail:       13.33 GB / 14.61 GB (91.2%)
Disk Space Avail:   22.89 GB / 223.03 GB (10.3%)
Presets specified: ['medium_quality']
Beginning AutoGluon training ... Time limit = 300s
AutoGluon will save models to "AutogluonModels/ag-20241017_024644"
Train Data Rows:    29305
Train Data Columns: 13
Label Column:       class
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [' <=50K', ' >50K']
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quantile'])
Problem Type:       binary
Preprocessing data ...
Selected class <--> label mapping:  class 1 =

  self.model = torch.load(net_filename)
	0.8396	 = Validation score   (accuracy)
	50.51s	 = Training   runtime
	1.15s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 299.63s of the -1.33s of remaining time.
	Ensemble Weights: {'CatBoost': 1.0}
	0.8784	 = Validation score   (accuracy)
	0.16s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 301.73s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 275701.6 rows/s (2500 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20241017_024644")


<autogluon.tabular.predictor.predictor.TabularPredictor at 0x7f28183ef1f0>

# 6. 생성된 모델에 테스트 데이터 넣어 예측하기

In [17]:
pred = model.predict(test_data)

# 7. 모델 성능 평가하기

In [18]:
result = model.evaluate(test_data)
result_df = pd.DataFrame([result], index=[0])
# 여러 모델 성능 비교
leader_board = model.leaderboard(test_data)
# 중요 변수 출력
feature_importance = model.feature_importance(test_data)
best_model_name = model.model_best
# best모델 로딩
best_model = model._trainer.load_model(best_model_name)
best_model_params = best_model.params

Computing feature importance via permutation shuffling for 13 features using 5000 rows with 5 shuffle sets...
	2.7s	= Expected runtime (0.54s per shuffle set)
	0.93s	= Actual runtime (Completed 5 of 5 shuffle sets)


# 8 결과 출력

In [21]:
print("="*20, "result_df", "="*20)
display(result_df)
print()
print("="*20, "leader_board", "="*20)
display(leader_board)
print()
print("="*20, "feature_importance", "="*20)
display(feature_importance)
print()
print("="*20, "best_model_name, params", "="*20)
print("best_model_name: ", best_model_name, "\nparams: ", best_model_params)
print()



Unnamed: 0,accuracy,balanced_accuracy,mcc,roc_auc,f1,precision,recall
0,0.875467,0.796967,0.640071,0.928474,0.712988,0.794845,0.646417





Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,CatBoost,0.875467,0.8784,accuracy,0.055694,0.008183,12.01651,0.055694,0.008183,12.01651,1,True,7
1,WeightedEnsemble_L2,0.875467,0.8784,accuracy,0.06436,0.009068,12.171801,0.008666,0.000885,0.155292,2,True,13
2,XGBoost,0.875416,0.8772,accuracy,0.099724,0.013983,1.275517,0.099724,0.013983,1.275517,1,True,11
3,LightGBM,0.872601,0.8764,accuracy,0.031327,0.005961,0.571997,0.031327,0.005961,0.571997,1,True,4
4,LightGBMXT,0.867226,0.87,accuracy,0.058394,0.012007,1.902644,0.058394,0.012007,1.902644,1,True,3
5,NeuralNetFastAI,0.856375,0.8628,accuracy,7.89596,1.053628,212.976156,7.89596,1.053628,212.976156,1,True,10
6,RandomForestGini,0.855044,0.8504,accuracy,1.518059,0.154923,2.766668,1.518059,0.154923,2.766668,1,True,5
7,RandomForestEntr,0.854891,0.8512,accuracy,1.588107,0.144864,2.201828,1.588107,0.144864,2.201828,1,True,6
8,ExtraTreesEntr,0.84967,0.8432,accuracy,1.805903,0.16671,1.909244,1.805903,0.16671,1.909244,1,True,9
9,ExtraTreesGini,0.849209,0.8428,accuracy,1.967846,0.155506,1.889012,1.967846,0.155506,1.889012,1,True,8





Unnamed: 0,importance,stddev,p_value,n,p99_high,p99_low
capital-gain,0.04364,0.002206,7.813844e-07,5,0.048183,0.039097
occupation,0.01928,0.002198,1.993044e-05,5,0.023806,0.014754
relationship,0.01776,0.004475,0.0004454331,5,0.026975,0.008545
marital-status,0.01772,0.004747,0.0005629572,5,0.027494,0.007946
age,0.0158,0.001944,2.696615e-05,5,0.019803,0.011797
capital-loss,0.01464,0.001785,2.60309e-05,5,0.018316,0.010964
education-num,0.00632,0.001895,0.0008642234,5,0.010222,0.002418
workclass,0.00616,0.000841,4.075653e-05,5,0.007893,0.004427
hours-per-week,0.00608,0.003211,0.006665418,5,0.012692,-0.000532
education,0.00428,0.002194,0.006017823,5,0.008797,-0.000237



best_model_name:  WeightedEnsemble_L2 
params:  {'use_orig_features': False, 'max_base_models': 25, 'max_base_models_per_type': 5, 'save_bag_folds': True}



# 함수화하고 분석 간단히 하기

In [8]:
def automl(data, target, time=300, metric=None):
     
    if metric in ("accuracy", "roc_auc", "recall", "precision", "f1"):
        train_data, test_data = train_test_split(data, stratify=data[target], test_size=0.4, random_state=10)
    else:
        train_data, test_data = train_test_split(data, test_size=0.4, random_state=10)
        
    model = TabularPredictor(label=target, eval_metric=metric)
    model.fit(train_data, time_limit=time, presets='medium_quality')
    pred = model.predict(test_data)

    result = model.evaluate(test_data)
    result_df = pd.DataFrame([result], index=[0])
    # 여러 모델 성능 비교
    leader_board = model.leaderboard(test_data)
    # 중요 변수 출력
    feature_importance = model.feature_importance(test_data)
    best_model_name = model.model_best
    # best모델 로딩
    best_model = model._trainer.load_model(best_model_name)
    best_model_params = best_model.params

    print("="*20, "result_df", "="*20)
    display(result_df)
    print()
    print("="*20, "leader_board", "="*20)
    display(leader_board)
    print()
    print("="*20, "feature_importance", "="*20)
    display(feature_importance)
    print()
    print("="*20, "best_model_name, params", "="*20)
    print("best_model_name: ", best_model_name, "\nparams: ", best_model_params)
    print()
    return best_model, result_df, leader_board, feature_importance

In [9]:
data = pd.read_csv("https://raw.githubusercontent.com/haram4th/ablearn/main/Taitanic_train.csv")
data.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S


In [10]:
automl(data, 'Survived')

No path specified. Models will be saved in: "AutogluonModels/ag-20241017_033700"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Mar 29 23:14:13 UTC 2024
CPU Count:          8
Memory Avail:       13.48 GB / 14.61 GB (92.2%)
Disk Space Avail:   21.76 GB / 223.03 GB (9.8%)
Presets specified: ['medium_quality']
Beginning AutoGluon training ... Time limit = 300s
AutoGluon will save models to "AutogluonModels/ag-20241017_033700"
Train Data Rows:    534
Train Data Columns: 11
Label Column:       Survived
AutoGluon infers your prediction problem is: 'binary' (because only two unique label-values observed).
	2 unique label values:  [0, 1]
	If 'binary' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You may specify problem_type as one of: ['binary', 'multiclass', 'regression', 'quantile'])
Problem Type:       binary


	0.39s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetTorch ... Training model for up to 277.6s of the 277.59s of remaining time.
The format of the columns of the 'remainder' transformer in ColumnTransformer.transformers_ will change in version 1.7 to match the format of the other transformers.
At the moment the remainder columns are stored as indices (of type int). With the same ColumnTransformer configuration, in the future they will be stored as column names (of type str).

  self.model = torch.load(net_filename)
	0.8037	 = Validation score   (accuracy)
	8.06s	 = Training   runtime
	0.03s	 = Validation runtime
Fitting model: LightGBMLarge ... Training model for up to 269.44s of the 269.44s of remaining time.
	0.8131	 = Validation score   (accuracy)
	0.74s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 299.56s of the 268.61s of remaining time.
	Ensemble Weights: {'LightGBM': 1.0}
	0.8411	 =



Unnamed: 0,accuracy,balanced_accuracy,mcc,roc_auc,f1,precision,recall
0,0.817927,0.778828,0.593756,0.856475,0.716157,0.811881,0.640625





Unnamed: 0,model,score_test,score_val,eval_metric,pred_time_test,pred_time_val,fit_time,pred_time_test_marginal,pred_time_val_marginal,fit_time_marginal,stack_level,can_infer,fit_order
0,LightGBM,0.817927,0.841121,accuracy,0.009537,0.003131,0.249972,0.009537,0.003131,0.249972,1,True,4
1,WeightedEnsemble_L2,0.817927,0.841121,accuracy,0.015003,0.004124,0.410796,0.005466,0.000993,0.160825,2,True,14
2,NeuralNetFastAI,0.815126,0.813084,accuracy,0.155555,0.113408,11.220496,0.155555,0.113408,11.220496,1,True,10
3,NeuralNetTorch,0.812325,0.803738,accuracy,0.312703,0.026331,8.06189,0.312703,0.026331,8.06189,1,True,12
4,LightGBMLarge,0.809524,0.813084,accuracy,0.017795,0.003872,0.736879,0.017795,0.003872,0.736879,1,True,13
5,XGBoost,0.806723,0.813084,accuracy,0.055758,0.005378,0.391832,0.055758,0.005378,0.391832,1,True,11
6,RandomForestEntr,0.806723,0.82243,accuracy,0.225424,0.076075,0.976612,0.225424,0.076075,0.976612,1,True,6
7,CatBoost,0.803922,0.841121,accuracy,0.013096,0.005139,1.137179,0.013096,0.005139,1.137179,1,True,7
8,RandomForestGini,0.803922,0.813084,accuracy,0.223681,0.072017,1.006399,0.223681,0.072017,1.006399,1,True,5
9,ExtraTreesEntr,0.80112,0.813084,accuracy,0.22994,0.077816,0.860995,0.22994,0.077816,0.860995,1,True,9





Unnamed: 0,importance,stddev,p_value,n,p99_high,p99_low
Sex,0.14902,0.012274,5e-06,5,0.174292,0.123747
Pclass,0.053782,0.007251,3.9e-05,5,0.06871,0.038853
Name,0.02409,0.004248,0.000111,5,0.032837,0.015343
Fare,0.022409,0.004852,0.000248,5,0.032399,0.012419
Embarked,0.014006,0.0101,0.018095,5,0.034801,-0.00679
Cabin,0.012325,0.004248,0.001455,5,0.021072,0.003578
Age,0.008964,0.005388,0.010238,5,0.020058,-0.002131
PassengerId,0.006162,0.007773,0.075472,5,0.022167,-0.009842
Parch,0.0,0.0,0.5,5,0.0,0.0
Ticket,0.0,0.0,0.5,5,0.0,0.0



best_model_name:  WeightedEnsemble_L2 
params:  {'use_orig_features': False, 'max_base_models': 25, 'max_base_models_per_type': 5, 'save_bag_folds': True}



(<autogluon.core.models.ensemble.weighted_ensemble_model.WeightedEnsembleModel at 0x7fd0ffe076d0>,
    accuracy  balanced_accuracy       mcc   roc_auc        f1  precision  \
 0  0.817927           0.778828  0.593756  0.856475  0.716157   0.811881   
 
      recall  
 0  0.640625  ,
                   model  score_test  score_val eval_metric  pred_time_test  \
 0              LightGBM    0.817927   0.841121    accuracy        0.009537   
 1   WeightedEnsemble_L2    0.817927   0.841121    accuracy        0.015003   
 2       NeuralNetFastAI    0.815126   0.813084    accuracy        0.155555   
 3        NeuralNetTorch    0.812325   0.803738    accuracy        0.312703   
 4         LightGBMLarge    0.809524   0.813084    accuracy        0.017795   
 5               XGBoost    0.806723   0.813084    accuracy        0.055758   
 6      RandomForestEntr    0.806723   0.822430    accuracy        0.225424   
 7              CatBoost    0.803922   0.841121    accuracy        0.013096   
 8   

In [12]:
!pip install gradio

Collecting gradio
  Downloading gradio-5.1.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Using cached aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Using cached ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Using cached httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting pydantic>=2.0 (from gradio)
  Using cached pydantic-2.9.2-py3-none-any.whl.metadata (149 kB)
Collecting pydub (from gradio)
  Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradio

In [19]:
import gradio as gr
import pandas as pd
from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split

# automl 함수 정의
def automl(data, target, time=300, metric=None):
    if metric in ("accuracy", "roc_auc", "recall", "precision", "f1"):
        train_data, test_data = train_test_split(data, stratify=data[target], test_size=0.4, random_state=10)
    else:
        train_data, test_data = train_test_split(data, test_size=0.4, random_state=10)
        
    model = TabularPredictor(label=target, eval_metric=metric)
    model.fit(train_data, time_limit=time, presets='medium_quality')
    pred = model.predict(test_data)

    result = model.evaluate(test_data)
    result_df = pd.DataFrame([result], index=[0])
    # 여러 모델 성능 비교
    leader_board = model.leaderboard(test_data)
    # 중요 변수 출력
    feature_importance = model.feature_importance(test_data)
    best_model_name = model.model_best
    # best모델 로딩
    best_model = model._trainer.load_model(best_model_name)
    best_model_params = best_model.params

    return result_df, leader_board, feature_importance

# 데이터 미리보기 (파일 업로드 후 head(3))
def preview_data(file):
    data = pd.read_csv(file.name)
    return data.head(3)

# Gradio 인터페이스 정의
def gradio_automl(file, target, time, metric):
    # CSV 파일을 pandas 데이터프레임으로 변환
    data = pd.read_csv(file.name)

    # automl 함수 호출
    result_df, leader_board, feature_importance = automl(data, target, time, metric)
    
    return result_df, leader_board, feature_importance

# Gradio 인터페이스 생성
with gr.Blocks() as demo:
    file_input = gr.File(label="CSV 데이터 파일")
    data_preview = gr.Dataframe(label="데이터 미리보기 (head 3)")
    target_input = gr.Textbox(label="타겟 변수 이름")
    time_input = gr.Number(label="분석 시간 (초)", value=300)
    metric_input = gr.Dropdown(choices=["accuracy", "roc_auc", "recall", "precision", "f1", None], label="성능 지표")
    
    # 분석 결과 출력 (3개의 데이터프레임)
    result_output = gr.Dataframe(label="Result Dataframe")
    leaderboard_output = gr.Dataframe(label="Leader Board")
    feature_importance_output = gr.Dataframe(label="Feature Importance")

    # 파일 업로드 시 데이터 미리보기 업데이트
    file_input.change(fn=preview_data, inputs=file_input, outputs=data_preview)

    # 분석 실행
    submit_button = gr.Button("분석 실행")
    submit_button.click(fn=gradio_automl, inputs=[file_input, target_input, time_input, metric_input], outputs=[result_output, leaderboard_output, feature_importance_output])

# Gradio 앱 실행 (공개 링크 제공)
demo.launch(inline=False, share=True)


* Running on local URL:  http://127.0.0.1:7864
* Running on public URL: https://2be9631a94060bb066.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




No path specified. Models will be saved in: "AutogluonModels/ag-20241017_044005"
Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.1.1
Python Version:     3.10.15
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Fri Mar 29 23:14:13 UTC 2024
CPU Count:          8
Memory Avail:       12.94 GB / 14.61 GB (88.5%)
Disk Space Avail:   21.67 GB / 223.03 GB (9.7%)
Presets specified: ['medium_quality']
Beginning AutoGluon training ... Time limit = 300s
AutoGluon will save models to "AutogluonModels/ag-20241017_044005"
Train Data Rows:    300
Train Data Columns: 7
Label Column:       Yearly Amount Spent
AutoGluon infers your prediction problem is: 'regression' (because dtype of label-column == float and many unique label-values observed).
	Label info (max, min, mean, stddev): (712.3963268096637, 275.9184206503857, 496.65055, 72.48768)
	If 'regression' is not the correct problem_type, please manually specify the problem_type parameter during Predictor init (You 

	4.68s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMLarge ... Training model for up to 277.25s of the 277.24s of remaining time.
	-24.7143	 = Validation score   (-root_mean_squared_error)
	10.61s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 299.52s of the 266.51s of remaining time.
	Ensemble Weights: {'NeuralNetFastAI': 0.765, 'NeuralNetTorch': 0.176, 'LightGBM': 0.059}
	-14.7067	 = Validation score   (-root_mean_squared_error)
	0.02s	 = Training   runtime
	0.0s	 = Validation runtime
AutoGluon training complete, total runtime = 33.73s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 1763.0 rows/s (60 batch size)
TabularPredictor saved. To load, use: predictor = TabularPredictor.load("AutogluonModels/ag-20241017_044005")
These features in provided data are not utilized by the predictor and will be ignored: ['Email']
Computing feature importance via permutation shuffling

	3.38s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: XGBoost ... Training model for up to 284.69s of the 284.69s of remaining time.
	-26.0482	 = Validation score   (-root_mean_squared_error)
	3.42s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: NeuralNetTorch ... Training model for up to 281.2s of the 281.2s of remaining time.
  self.model = torch.load(net_filename)
	-17.8843	 = Validation score   (-root_mean_squared_error)
	5.1s	 = Training   runtime
	0.01s	 = Validation runtime
Fitting model: LightGBMLarge ... Training model for up to 276.03s of the 276.03s of remaining time.
	-24.7143	 = Validation score   (-root_mean_squared_error)
	11.01s	 = Training   runtime
	0.0s	 = Validation runtime
Fitting model: WeightedEnsemble_L2 ... Training model for up to 299.56s of the 264.89s of remaining time.
	Ensemble Weights: {'NeuralNetFastAI': 0.765, 'NeuralNetTorch': 0.176, 'LightGBM': 0.059}
	-14.7067	 = Validation score   (-root_mean_squared_error)
	0.0