In [None]:
from sklearn.model_selection import train_test_split
import pandas as pd
import os


def load_and_split_data(filename, data_folder, target_column='pct_pos_total'):
    data_path = os.path.join(data_folder, filename)
    data = pd.read_csv(data_path)

    # Split into features and target
    X = data.drop(columns=[target_column])
    y = data[target_column]

    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Concatenate training and test sets
    train_data = pd.concat([X_train, y_train], axis=1)
    test_data = pd.concat([X_test, y_test], axis=1)

    return train_data, test_data

cleaned_filename = "cleaned_top_500.csv"
raw_filename = "top_500.csv"
data_folder_cleaned = os.path.join("..", "data", "02_interim")
data_folder_raw = os.path.join("..", "data", "01_raw")


train_cleaned, test_cleaned = load_and_split_data(cleaned_filename, data_folder_cleaned)
train_raw, test_raw = load_and_split_data(raw_filename, data_folder_raw)



In [None]:
from autogluon.tabular import TabularDataset, TabularPredictor
#wczytanie danych
train_data = TabularDataset(train_cleaned)
test_data = TabularDataset(test_cleaned)
#definicja predyctora dodac poprawny dane
predictor_auto = TabularPredictor(label = 'pct_pos_total', path ='../data/03_model').fit(train_data, presets = 'good', time_limit = 30)

#predykcja
predictions_auto = predictor_auto.predict(test_data)

print( predictions_auto)

#zestawienie
leaderboard_auto = predictor_auto.leaderboard()
print(leaderboard_auto)
#ewaulacja
print(predictor_auto.evaluate(train_data))

#info
print("hiper!!!!----------------------------")
model_name = predictor_auto.model_best # Pobranie najlepszego modelu
model_info = predictor_auto.info() #zwraca słownik z informacjami o trenowaniu

model_info['model_info'][model_name]['hyperparameters'] #pobiera hiperparametry modelu wybranego jako najlepszy

In [None]:
#wczytanie danych
train_data = TabularDataset(train_raw)
test_data = TabularDataset(test_raw)
#definicja predyctora dodac poprawny dane
predictor_auto = TabularPredictor(label = 'pct_pos_total', path ='../data/03_model').fit(train_data, presets = 'good', time_limit = 30)

#predykcja
predictions_auto = predictor_auto.predict(test_data)

print(predictions_auto)

#zestawienie
leaderboard_auto = predictor_auto.leaderboard()
print(leaderboard_auto)
#ewaulacja
print( predictor_auto.evaluate(train_data))

#info
print("hiper!!!!----------------------------")
model_name = predictor_auto.model_best # Pobranie najlepszego modelu
model_info = predictor_auto.info() #zwraca słownik z informacjami o trenowaniu

model_info['model_info'][model_name]['hyperparameters'] #pobiera hiperparametry modelu wybranego jako najlepszy


