# VU Econometics and Data Science: Case Study
```
Author(s): Jacco Broere
```


### Setup
- Setup config.ini file
- Install necessary packages
- Download and unpack data



In [1]:
# import utility modules
import pandas as pd
import numpy as np
import configparser
import os
from joblib import dump, load
import datetime as dt
from tqdm import tqdm
import json


# import sweetviz
import matplotlib.pyplot as plt

# import optuna
import optuna
optuna.logging.set_verbosity(optuna.logging.ERROR)

# helper functions
from helpers.helper_functions import transform_data, add_actuals, get_pca_pipeline, get_model
from helpers.helper_classes import AddFeatureNames, GeneSPCA, EnetSPCA
from helpers.config.hyperparameters import OptunaOptimzation
from helpers.config.hyperparameters import PCA_LGBM_CFG, SPCA_LGBM_CFG, GSPCA_LGBM_CFG, PCA_LR_CFG, SPCA_LR_CFG, GSPCA_LR_CFG


# sklearn
from sklearn.decomposition import PCA, SparsePCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split, ShuffleSplit
from sklearn.metrics import confusion_matrix, classification_report, ConfusionMatrixDisplay, roc_auc_score, roc_curve, RocCurveDisplay, f1_score
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier


# LightGBM
from lightgbm import LGBMClassifier

# feature_engine
from feature_engine.selection import DropFeatures, DropConstantFeatures, DropDuplicateFeatures

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Read config.ini file
config = configparser.ConfigParser()
config.read('config.ini')

os.chdir(config['PATH']['ROOT_DIR'])

OPTUNA_DIR = config['LOGGING']['OPTUNA_DIR']
DATA_DIR = config['PATH']['DATA_DIR']
DATASETS = json.loads(config.get('PARAMS', 'DATASETS'))

In [3]:
# Load data library
data = load(DATA_DIR + '/microarray-data-dict.lib')

In [14]:
dataset = DATASETS[0]
dataset = 'alon'
X_train = data[dataset]["none"]['X_train']
X_train.shape

(41, 2000)