In [None]:
from covidcomp.data import RawRepresentation, DerivedRepresentation
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from fomlads.model.classification import logistic_regression_fit, logistic_regression_predict, shared_covariance_model_fit, shared_covariance_model_predict
from fomlads.evaluate.eval_classification import eval_accuracy

## Create raw data representation from `./covid.csv` and auxiliary datasets

In [None]:
raw = RawRepresentation("./covid.csv")

# Flat comparison
## Get flat data representation

In [None]:
flat_input, flat_target = raw.get_flat_representation()

## Fit model on Flat and test

In [None]:
flat_derived = DerivedRepresentation(flat_input, flat_target)
flat_train_inputs = flat_derived.train_inputs
flat_train_targets = flat_derived.train_targets
flat_test_inputs = flat_derived.test_inputs
flat_test_targets = flat_derived.test_targets

print(f"Number of pairs in flat: {flat_train_inputs.shape[0]}")

weights = logistic_regression_fit(flat_train_inputs, flat_train_targets)
flat_test_predictions = logistic_regression_predict(flat_test_inputs, weights)

accuracy = eval_accuracy(flat_test_targets, flat_test_predictions)
print(f'Flat Accuracy - : {accuracy}')

# Partition by Continent

## Generate raw and partitioned representations by continent. For each continent, get the derived representation and conduct experiment

In [None]:
partitioned_by_continent_dict = raw.get_partitioned_representation("continent")

accuracy_sum = 0

for continent in partitioned_by_continent_dict:
    raw_input, raw_target = partitioned_by_continent_dict[continent]
    print(f"\nNumber of countries in {continent}: {raw_input.shape[0]}")
    derived_continent = DerivedRepresentation(raw_input, raw_target, test_fraction=0.5)
    
    train_inputs = derived_continent.train_inputs
    train_targets = derived_continent.train_targets
    test_inputs = derived_continent.test_inputs
    test_targets = derived_continent.test_targets
    
    print(f"\nNumber of training pairs in {continent}: {train_inputs.shape[0]}")
        
    weights = logistic_regression_fit(train_inputs, train_targets, termination_threshold=10e-4)
    test_predictions = logistic_regression_predict(test_inputs, weights)

    accuracy = eval_accuracy(test_targets, test_predictions)
    accuracy_sum += accuracy
    
    print(f'{continent} Accuracy - : {accuracy}')
    
print(f'\nMean Accuracy by continents - : {accuracy_sum/len(partitioned_by_continent_dict)}')

# Partition by Income Group

## Generate raw and partitioned representations by Income Group. For each Income Group, get the derived representation and conduct experiment

In [None]:
partitioned_by_income = raw.get_partitioned_representation("income_group")

accuracy_sum = 0

for income_group in partitioned_by_income:
    raw_input, raw_target = partitioned_by_income[income_group]
    print(f"\nNumber of countries in {income_group}: {raw_input.shape[0]}")
    derived_income = DerivedRepresentation(raw_input, raw_target, test_fraction=0.2)
    
    train_inputs = derived_income.train_inputs
    train_targets = derived_income.train_targets
    test_inputs = derived_income.test_inputs
    test_targets = derived_income.test_targets
    
    print(f"\nNumber of training pairs in {income_group}: {train_inputs.shape[0]}")
    
    weights = logistic_regression_fit(train_inputs, train_targets, termination_threshold=10e-15)
    test_predictions = logistic_regression_predict(test_inputs, weights)

    accuracy = eval_accuracy(test_targets, test_predictions)
    accuracy_sum += accuracy
    
    print(f'{continent} Accuracy - : {accuracy}')
    
print(f'\nMean Accuracy by income group - : {accuracy_sum/len(partitioned_by_income)}')