In [1]:
from feature_selection_utils import *
from supervised_utils import *
from neural_net_utils import *

### Feature selection per il task di regressione

In [2]:
df = pd.read_csv("../data/movies_features_reg_last30.csv")
cols = {
    "target": "score",
    "drop": ["id", "title"],
    "dummies": ["rating_cat", "genre_cat"],
    "labels": [],
    "standardize": ["runtime", "director_age", "star_age"],
    "minmax": ["popularity", "budget", "director_experience", "star_experience"]
}
# carico il modello migliore per la regressione (con MSE 0.43)
best_model = joblib.load("models/Random_Forest_Reg-v2.joblib")

In [None]:
# Visualizzo la mutual information delle features (Top 15)
plot_mutual_information(df, cols, task="regression").head(15)

In [None]:
# Visualizzo l'importanza delle features (Top 15)
plot_feature_importances(df, cols, best_model, task="regression").head(15)

In [None]:
# Eseguo la manual forward selection per ottenere una top 15
manual_forward_selection(df, cols, best_model, k_features=15, task="regression")

#### Sfida con la rete neurale feed-forward (su un sottoinsieme di features)

In [None]:
best_features = ["popularity",
                 "runtime",
                 "budget",
                 "age",
                 "budget_efficiency",
                 "director_age",
                 "star_age",
                 "genre_cat_Animation",
                 "genre_cat_Action",
                 "genre_cat_Horror",
                 "rating_cat_PG-13",
                 "director_experience",
                 "star_experience",
                 "director_is_panned",
                 "star_is_panned"]

# training della rete
trainer = Trainer(df, cols, best_features, task="regression")
trainer.fit()

### Feature selection per il task di classificazione

In [10]:
df = pd.read_csv("../data/movies_features_cls_last30.csv")
cols = {
    "target": "budget_efficiency_cat",
    "drop": ["id", "title", "popularity"],
    "dummies": ["rating_cat", "genre_cat"],
    "labels": [],
    "standardize": ["runtime", "director_age", "star_age"],
    "minmax": ["budget", "director_experience", "star_experience"]
}
# carico il modello migliore per la classificazione (con accuracy 53%)
best_model = joblib.load("models/Random_Forest_Cls-v2(resampled).joblib")

In [None]:
# Visualizzo la mutual information delle features (Top 15)
plot_mutual_information(df, cols, task="classification").head(15)

In [None]:
# Visualizzo l'importanza delle features (Top 15)
plot_feature_importances(df, cols, best_model, task="classification").head(15)

In [None]:
# Eseguo la manual forward selection per ottenere una top 15
manual_forward_selection(df, cols, best_model, k_features=15, task="classification")

#### Sfida con la rete neurale feed-forward (su un sottoinsieme di features)

In [None]:
best_features = ["score",
                 "runtime",
                 "budget",
                 "age",
                 "director_age",
                 "star_age",
                 "director_efficiency",
                 "star_efficiency",
                 "director_experience",
                 "star_experience",
                 "genre_cat_Comedy",
                 "genre_cat_Drama",
                 "genre_cat_Horror",
                 "rating_cat_PG-13",
                 "rating_cat_R"]

# training della rete
trainer = Trainer(df, cols, best_features, task="classification", resample=False)
trainer.fit()