In [51]:
from functions import load_data, preprocess_data, run_svc, run_dummy, run_randomforest
import numpy as np
import pandas as pd
from nltk.corpus import stopwords
from nltk.stem.snowball import EnglishStemmer, SpanishStemmer

import nltk
nltk.download('stopwords')

# set 5 different seeds for reproducibility
seeds = [20210102, 20210101, 20210102, 20210103, 20210104, 20210105]

# initialize dictionary for countries/datasets
countries = {"Venezuela": "raw/vz-tweets 2.csv", "Ghana": "raw/gh-tweets 2.csv", "Philippines": "raw/ph-tweets 2.csv"}

# define dataframe to store results
results_dummy = pd.DataFrame(
    columns=["Baseline", "Country", "Accuracy", "Accuracy Std. Dev.", "Precision", "Precision Std. Dev.", "Recall",
             "Recall Std. Dev.", "F1", "F1 Std. Dev."])
results_svc= pd.DataFrame(
    columns=["Baseline", "Country", "Accuracy", "Accuracy Std. Dev.", "Precision", "Precision Std. Dev.", "Recall",
             "Recall Std. Dev.", "F1", "F1 Std. Dev."])
results_randomforest = pd.DataFrame(
    columns=["Baseline", "Country", "Accuracy", "Accuracy Std. Dev.", "Precision", "Precision Std. Dev.", "Recall",
             "Recall Std. Dev.", "F1", "F1 Std. Dev."])
results_tuning_svc = pd.DataFrame(columns=["Country", "kernel", "C", "class_weight", "gamma", "Tuning F1"])
results_tuning_randomforest = pd.DataFrame(columns=["Country", "max_depth", "n_estimators", "class_weight", "max_features", "Tuning F1"])

[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/andreaskuepfer/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [52]:
# loop over all countries

# Todo:
# Implement Naive Bayes model
# Run all

for country, path in countries.items():
    print("\nCurrent Country: " + country)
    # initialize stopwords and stemmer in correct language
    stops = set(stopwords.words("spanish")) if country == "Venezuela" else set(stopwords.words("english"))
    stemmer = SpanishStemmer() if country == "Venezuela" else EnglishStemmer()
    
    results_scores_current_dummy = []
    results_scores_current_svc = []
    results_scores_current_randomforest = []
    results_scores_current_untuned_svc = []
    results_scores_current_untuned_randomforest = []
    results_tuning_current_svc = []
    results_tuning_current_untuned_svc = []
    results_tuning_current_randomforest = []
    results_tuning_current_untuned_randomforest = []
    
    # preprocess the data
    data = preprocess_data(path, stops, stemmer)

    # loop over seeds, load data and tune/train baseline models
    for seed in seeds:
        X_train_tfidf, X_test_tfidf, y_train, y_test = load_data(data, seed)
        
        # Dummy Classifier
        print("Dummy...")
        result_scores = run_dummy(X_train_tfidf, X_test_tfidf, y_train, y_test)
        results_scores_current_dummy.append(result_scores)
        print(result_scores)

        # SVC Tuned
        print("SVC...")
        result_scores, results_tuning_current = run_svc(X_train_tfidf, X_test_tfidf, y_train, y_test)
        results_scores_current_svc.append(result_scores)
        results_tuning_svc = results_tuning_svc.append({"Country": country,
                                                "kernel": results_tuning_current[0],
                                                "C": results_tuning_current[1],
                                                "class_weight": results_tuning_current[2],
                                                "gamma": results_tuning_current[3],
                                                "Tuning F1": results_tuning_current[4]}, ignore_index=True)
        print(result_scores)

        # SVC Untuned
        print("SVC Untuned...")
        result_scores = run_svc(X_train_tfidf, X_test_tfidf, y_train, y_test, tune = False)
        results_tuning_current_untuned_svc.append(result_scores)
        print(result_scores)
        
        # Random Forest Tuned
        print("Random Forest...")
        result_scores, results_tuning_current = run_randomforest(X_train_tfidf, X_test_tfidf, y_train, y_test)
        results_scores_current_randomforest.append(result_scores)
        results_tuning_randomforest = results_tuning_randomforest.append({"Country": country,
                                                "max_depth": results_tuning_current[0],
                                                "n_estimators": results_tuning_current[1],
                                                "class_weight": results_tuning_current[2],
                                                "max_features": results_tuning_current[3],
                                                "Tuning F1": results_tuning_current[4]}, ignore_index=True)
        print(result_scores)
        
        # Random Forest Untuned  
        print("Random Forest Untuned...")
        result_scores = run_randomforest(X_train_tfidf, X_test_tfidf, y_train, y_test, tune = False)
        results_tuning_current_untuned_randomforest.append(result_scores)
        print(result_scores)
        
    # calculate means/standard deviations from results
    # Dummy Classifier
    results_current_mean_dummy = np.array(results_scores_current_dummy).mean(axis=0)
    results_std_dev_dummy = np.array(results_scores_current_dummy).std(axis=0)
    results_dummy = results_dummy.append({"Baseline": "Dummy", "Country": country,
                              "Accuracy": results_current_mean_dummy[0],
                              "Accuracy Std. Dev.": results_std_dev_dummy[0],
                              "Precision": results_current_mean_dummy[1],
                              "Precision Std. Dev.": results_std_dev_dummy[1],
                              "Recall": results_current_mean_dummy[2],
                              "Recall Std. Dev.": results_std_dev_dummy[2],
                              "F1": results_current_mean_dummy[3],
                              "F1 Std. Dev.": results_std_dev_dummy[3]}, ignore_index=True)
                              
    # SVC Tuned
    results_current_mean_svc = np.array(results_scores_current_svc).mean(axis=0)
    results_std_dev_svc = np.array(results_scores_current_svc).std(axis=0)
    results_svc = results_svc.append({"Baseline": "SVM", "Country": country,
                              "Accuracy": results_current_mean_svc[0],
                              "Accuracy Std. Dev.": results_std_dev_svc[0],
                              "Precision": results_current_mean_svc[1],
                              "Precision Std. Dev.": results_std_dev_svc[1],
                              "Recall": results_current_mean_svc[2],
                              "Recall Std. Dev.": results_std_dev_svc[2],
                              "F1": results_current_mean_svc[3],
                              "F1 Std. Dev.": results_std_dev_svc[3]}, ignore_index=True)
                              
    # SVC Untuned                              
    results_current_mean_untuned_svc = np.array(results_scores_current_untuned_svc).mean(axis=0)
    results_std_dev_untuned_svc = np.array(results_scores_current_untuned_svc).std(axis=0)
    results_svc = results_svc.append({"Baseline": "SVM Untuned", "Country": country,
                              "Accuracy": results_current_mean_untuned_svc[0],
                              "Accuracy Std. Dev.": results_std_dev_untuned_svc[0],
                              "Precision": results_current_mean_untuned_svc[1],
                              "Precision Std. Dev.": results_std_dev_untuned_svc[1],
                              "Recall": results_current_mean_untuned_svc[2],
                              "Recall Std. Dev.": results_std_dev_untuned_svc[2],
                              "F1": results_current_mean_untuned_svc[3],
                              "F1 Std. Dev.": results_std_dev_untuned_svc[3]}, ignore_index=True)
                              
    # Random Forest Tuned
    results_current_mean_randomforest = np.array(results_scores_current_randomforest).mean(axis=0)
    results_std_dev_randomforest = np.array(results_scores_current_randomforest).std(axis=0)
    results_randomforest = results_randomforest.append({"Baseline": "Random Forest", "Country": country,
                              "Accuracy": results_current_mean_randomforest[0],
                              "Accuracy Std. Dev.": results_std_dev_randomforest[0],
                              "Precision": results_current_mean_randomforest[1],
                              "Precision Std. Dev.": results_std_dev_randomforest[1],
                              "Recall": results_current_mean_randomforest[2],
                              "Recall Std. Dev.": results_std_dev_randomforest[2],
                              "F1": results_current_mean_randomforest[3],
                              "F1 Std. Dev.": results_std_dev_randomforest[3]}, ignore_index=True)
                              
    # Random Forest Untuned
    results_current_mean_untuned_randomforest = np.array(results_scores_current_untuned_randomforest).mean(axis=0)
    results_std_dev_untuned_randomforest = np.array(results_scores_current_untuned_randomforest).std(axis=0)
    results_randomforest = results_randomforest.append({"Baseline": "Random Forest Untuned", "Country": country,
                              "Accuracy": results_current_mean_untuned_randomforest[0],
                              "Accuracy Std. Dev.": results_std_dev_untuned_randomforest[0],
                              "Precision": results_current_mean_untuned_randomforest[1],
                              "Precision Std. Dev.": results_std_dev_untuned_randomforest[1],
                              "Recall": results_current_mean_untuned_randomforest[2],
                              "Recall Std. Dev.": results_std_dev_untuned_randomforest[2],
                              "F1": results_current_mean_untuned_randomforest[3],
                              "F1 Std. Dev.": results_std_dev_untuned_randomforest[3]}, ignore_index=True)


Current Country: Venezuela
SVC...
Fitting 5 folds for each of 484 candidates, totalling 2420 fits
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=rbf; total time=   0.3s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=sigmoid; total time=   0.4s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=sigmoid; total time=   0.2s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=poly; total time=   0.8s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.1, svc__kernel=sigmoid; total time=   0.2s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, 

Best Tuning Score is 0.5411752556444264 with params {'svc__C': 403.4287934927351, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0001, 'svc__kernel': 'rbf'}
[0.9273153575615475, 0.38461538461538464, 0.5319148936170213, 0.44642857142857145]

Current Country: Ghana
SVC...
Fitting 5 folds for each of 484 candidates, totalling 2420 fits
Best Tuning Score is 0.6661288515406163 with params {'svc__C': 2980.9579870417283, 'svc__class_weight': 'balanced', 'svc__gamma': 0.0001, 'svc__kernel': 'rbf'}
[0.9468503937007874, 0.5714285714285714, 0.625, 0.5970149253731343]

Current Country: Philippines
SVC...
Fitting 5 folds for each of 484 candidates, totalling 2420 fits
[CV] END svc__C=1.0, svc__class_weight=None, svc__gamma=0.0001, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=1.0, svc__class_weight=None, svc__gamma=0.01, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=1.0, svc__class_weight=None, svc__gamma=0.01, svc__kernel=poly; total time=   0.2s
[CV] END svc__C=1.0, svc__class_w

[CV] END svc__C=8103.083927575384, svc__class_weight=None, svc__gamma=scale, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=8103.083927575384, svc__class_weight=None, svc__gamma=scale, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=8103.083927575384, svc__class_weight=None, svc__gamma=auto, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=8103.083927575384, svc__class_weight=None, svc__gamma=auto, svc__kernel=sigmoid; total time=   0.2s
[CV] END svc__C=8103.083927575384, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=8103.083927575384, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=sigmoid; total time=   0.3s
[CV] END svc__C=8103.083927575384, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=8103.083927575384, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=poly; total time=   0.7s
[CV] END svc__C=8103.083927575384, svc__class_weight=balanced, 

[CV] END svc__C=1.0, svc__class_weight=None, svc__gamma=0.0001, svc__kernel=poly; total time=   0.1s
[CV] END svc__C=1.0, svc__class_weight=None, svc__gamma=0.01, svc__kernel=sigmoid; total time=   0.2s
[CV] END svc__C=1.0, svc__class_weight=None, svc__gamma=0.1, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=1.0, svc__class_weight=None, svc__gamma=1, svc__kernel=poly; total time=   0.8s
[CV] END svc__C=1.0, svc__class_weight=None, svc__gamma=1, svc__kernel=poly; total time=   0.7s
[CV] END svc__C=1.0, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=poly; total time=   0.8s
[CV] END svc__C=1.0, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=poly; total time=   0.7s
[CV] END svc__C=1.0, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=poly; total time=   0.7s
[CV] END svc__C=1.0, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=poly; total time=   0.7s
[CV] END svc__C=1.0, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=sigmoid; tot

[CV] END svc__C=2.718281828459045, svc__class_weight=None, svc__gamma=0.001, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=2.718281828459045, svc__class_weight=None, svc__gamma=0.01, svc__kernel=poly; total time=   0.1s
[CV] END svc__C=2.718281828459045, svc__class_weight=None, svc__gamma=0.1, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=2.718281828459045, svc__class_weight=None, svc__gamma=0.1, svc__kernel=sigmoid; total time=   0.2s
[CV] END svc__C=2.718281828459045, svc__class_weight=None, svc__gamma=1, svc__kernel=poly; total time=   0.6s
[CV] END svc__C=2.718281828459045, svc__class_weight=None, svc__gamma=scale, svc__kernel=poly; total time=   0.6s
[CV] END svc__C=2.718281828459045, svc__class_weight=None, svc__gamma=auto, svc__kernel=rbf; total time=   0.1s
[CV] END svc__C=2.718281828459045, svc__class_weight=None, svc__gamma=auto, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=2.718281828459045, svc__class_weight=balanced, svc__gamma=0.0001, svc__kerne

[CV] END svc__C=403.4287934927351, svc__class_weight=balanced, svc__gamma=1, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=403.4287934927351, svc__class_weight=balanced, svc__gamma=scale, svc__kernel=rbf; total time=   0.5s
[CV] END svc__C=403.4287934927351, svc__class_weight=balanced, svc__gamma=scale, svc__kernel=poly; total time=   0.8s
[CV] END svc__C=403.4287934927351, svc__class_weight=balanced, svc__gamma=auto, svc__kernel=sigmoid; total time=   0.6s
[CV] END svc__C=1096.6331584284585, svc__class_weight=None, svc__gamma=0.0001, svc__kernel=poly; total time=   0.1s
[CV] END svc__C=1096.6331584284585, svc__class_weight=None, svc__gamma=0.001, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=1096.6331584284585, svc__class_weight=None, svc__gamma=0.01, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=1096.6331584284585, svc__class_weight=None, svc__gamma=0.01, svc__kernel=sigmoid; total time=   0.2s
[CV] END svc__C=1096.6331584284585, svc__class_weight=None, svc__gam

[CV] END svc__C=20.085536923187668, svc__class_weight=None, svc__gamma=0.1, svc__kernel=poly; total time=   0.6s
[CV] END svc__C=20.085536923187668, svc__class_weight=None, svc__gamma=1, svc__kernel=poly; total time=   0.8s
[CV] END svc__C=20.085536923187668, svc__class_weight=None, svc__gamma=scale, svc__kernel=poly; total time=   0.7s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=rbf; total time=   0.8s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=sigmoid; total time=   0.8s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=poly; total time=   0.8s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=rbf; total time=   0.4s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=sigmoid; total time=   0.5s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced

[CV] END svc__C=7.38905609893065, svc__class_weight=None, svc__gamma=1, svc__kernel=rbf; total time=   0.6s
[CV] END svc__C=7.38905609893065, svc__class_weight=None, svc__gamma=1, svc__kernel=sigmoid; total time=   0.2s
[CV] END svc__C=7.38905609893065, svc__class_weight=None, svc__gamma=scale, svc__kernel=rbf; total time=   0.6s
[CV] END svc__C=7.38905609893065, svc__class_weight=None, svc__gamma=scale, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=7.38905609893065, svc__class_weight=None, svc__gamma=auto, svc__kernel=poly; total time=   0.1s
[CV] END svc__C=7.38905609893065, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=rbf; total time=   0.8s
[CV] END svc__C=7.38905609893065, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.8s
[CV] END svc__C=7.38905609893065, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=sigmoid; total time=   0.7s
[CV] END svc__C=7.38905609893065, svc__class_weight=balanced, svc__gamma=0.01, svc__ker

[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.1s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.1s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.1s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.1s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=poly; total time=   0.3s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=sigmoid; total time=   0.0s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=2980.9579870417283, svc__class_

[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=poly; total time=   0.8s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.9s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=sigmoid; total time=   1.0s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=poly; total time=   0.8s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=0.1, svc__kernel=sigmoid; total time=   0.2s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=1, svc__kernel=rbf; total time=   0.6s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=1, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=20.085536923187668, svc__class_weight=balanced, svc__gamma=scale, svc__kernel=rbf; total time=   0.6s
[CV] END svc__C=20.085536923187668, svc__class_weigh

[CV] END svc__C=2980.9579870417283, svc__class_weight=None, svc__gamma=1, svc__kernel=rbf; total time=   0.6s
[CV] END svc__C=2980.9579870417283, svc__class_weight=None, svc__gamma=1, svc__kernel=sigmoid; total time=   0.1s
[CV] END svc__C=2980.9579870417283, svc__class_weight=None, svc__gamma=scale, svc__kernel=rbf; total time=   0.5s
[CV] END svc__C=2980.9579870417283, svc__class_weight=None, svc__gamma=scale, svc__kernel=poly; total time=   0.7s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.0001, svc__kernel=poly; total time=   0.7s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=poly; total time=   0.8s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.01, svc__kernel=sigmoid; total time=   0.2s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__gamma=0.1, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=2980.9579870417283, svc__class_weight=balanced, svc__ga

[CV] END svc__C=2.718281828459045, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.3s
[CV] END svc__C=2.718281828459045, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.3s
[CV] END svc__C=2.718281828459045, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=rbf; total time=   0.3s
[CV] END svc__C=2.718281828459045, svc__class_weight=balanced, svc__gamma=0.001, svc__kernel=poly; total time=   0.3s
[CV] END svc__C=2.718281828459045, svc__class_weight=balanced, svc__gamma=1, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=2.718281828459045, svc__class_weight=balanced, svc__gamma=1, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=2.718281828459045, svc__class_weight=balanced, svc__gamma=1, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=2.718281828459045, svc__class_weight=balanced, svc__gamma=1, svc__kernel=rbf; total time=   0.2s
[CV] END svc__C=7.38905609893065, svc__class_weight=None, svc__gamma=0.0001, sv

Best Tuning Score is 0.5513322884012538 with params {'svc__C': 148.4131591025766, 'svc__class_weight': None, 'svc__gamma': 0.01, 'svc__kernel': 'rbf'}
[0.9609053497942387, 0.7, 0.30434782608695654, 0.42424242424242425]


In [41]:
## Store detailed result scores
print(results_dummy)
print(results_svc)
print(results_randomforest)
results_dummy.round(3).to_csv("dummy_results.csv", index=False)
results_svc.round(3).to_csv("svm_results.csv", index=False)
results_randomforest.round(3).to_csv("randomforest_results.csv", index=False)

# Store best hyperparameter combinations (5 tuning runs) for each country
print(results_tuning_svc)
print(results_tuning_randomforest)
results_tuning_svc.round(3).to_csv("svm_results_hyperparameter.csv", index=False)
results_tuning_randomforest.round(3).to_csv("randomforest_results_hyperparameter.csv", index=False)

  Baseline    Country  Accuracy  Accuracy Std. Dev.  Precision  \
0    Dummy  Venezuela    0.9449                 0.0        0.0   

   Precision Std. Dev.  Recall  Recall Std. Dev.   F1  F1 Std. Dev.  
0                  0.0     0.0               0.0  0.0           0.0  
  Baseline    Country  Accuracy  Accuracy Std. Dev.  Precision  \
0      SVM  Venezuela  0.929191            0.006141   0.409531   

   Precision Std. Dev.    Recall  Recall Std. Dev.        F1  F1 Std. Dev.  
0             0.031672  0.638298          0.064535  0.497961      0.038085  
Empty DataFrame
Columns: [Baseline, Country, Accuracy, Accuracy Std. Dev., Precision, Precision Std. Dev., Recall, Recall Std. Dev., F1, F1 Std. Dev.]
Index: []
     Country   kernel           C class_weight   gamma  Tuning F1
0  Venezuela      rbf    1.000000     balanced     0.1   0.537762
1  Venezuela      rbf  403.428793     balanced  0.0001   0.541175
2  Venezuela      rbf    1.000000     balanced     0.1   0.558306
3  Venezuela   