In [None]:
from covidcomp.data import RawRepresentation, DerivedRepresentation
from covidcomp.model import LogisticRegression
from covidcomp.experiment import ExperimentRunner
from covidcomp.plot import Plotter
from sklearn.decomposition import PCA

raw = RawRepresentation("./covid.csv")

model = LogisticRegression()
num_folds = 6
pca = PCA(n_components=8)
runner = ExperimentRunner(model, num_folds, pca=pca)

plotter = Plotter()

flat_dict = raw.get_representation()
partitioned_by_continent_dict = raw.get_representation("continent")
partitioned_by_income = raw.get_representation("income_group")

## Plot correlation matrix heatmap

In [None]:
plotter.plot_data_corr(raw.frame)

## Plot the inputs before preprocessing

In [None]:
raw_inputs, raw_targets = flat_dict["Flat"]
plotter.plot_hist(raw_inputs, title="Input Data Before Preprocessing")

## Plot the inputs after preprocessing

In [None]:
flat_derived = DerivedRepresentation(raw_inputs, raw_targets)
plotter.plot_hist(flat_derived.preprocessed_inputs, title="Input Data After Preprocessing")

## Plot the ratio of variance explained in PCA

In [None]:
plotter.plot_pca_explained_variance(flat_derived.inputs)

# Flat comparison
## Get flat data representation and fit model on Flat and test

In [None]:
results = runner.run_partition_experiment(flat_dict, partitioning_method="Flat")
plotter.plot_partitioning_method_results(results)
print(f"Weighted accuracy for Flat: {results.weighted_average_accuracy}")

# Partition by Continent

## Generate raw and partitioned representations by continent. For each continent, get the derived representation and conduct experiment

In [None]:
results = runner.run_partition_experiment(partitioned_by_continent_dict, partitioning_method="Continent")
plotter.plot_partitioning_method_results(results)
print(f"Weighted accuracy for Continent: {results.weighted_average_accuracy}")

# Partition by Income Group

## Generate raw and partitioned representations by Income Group. For each Income Group, get the derived representation and conduct experiment

In [None]:
results = runner.run_partition_experiment(partitioned_by_income, partitioning_method="Income Group")
plotter.plot_partitioning_method_results(results)
print(f"Weighted accuracy for Income Group: {results.weighted_average_accuracy}")