In [1]:
import pandas as pd
import numpy as np
import warnings
warnings.simplefilter('ignore')

In [2]:
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import StratifiedKFold, train_test_split
from sklearn.metrics import f1_score
import json

In [3]:
def get_acc_scores(X, y):
    skf = StratifiedKFold(n_splits=10, random_state=0, shuffle=True)
    acc_scores = {'rf test': [], 'rf train': []}
    for train_index, test_index in skf.split(X, y):
        X_train, X_test = X.iloc[train_index, :], X.iloc[test_index, :]
        y_train, y_test = y.iloc[train_index, :], y.iloc[test_index, :]
        rf = RandomForestClassifier(n_estimators=10, n_jobs=-1)
        rf.fit(X_train, y_train)
        rf_test = rf.score(X_test, y_test)
        rf_train = rf.score(X_train, y_train)
        acc_scores['rf test'].append(rf_test)
        acc_scores['rf train'].append(rf_train)
    return acc_scores 

In [4]:
def main(class_, name):
    clfs_acc = {}
    for i in range(1, len(class_)+1):
        X = class_[i-1].iloc[:, 3:]
        features = X.columns
        sc = StandardScaler()
        X = sc.fit_transform(X)
        y = pd.DataFrame(class_[i-1].iloc[:, 2])
        X = pd.DataFrame(X, columns=features)
        clfs_acc[name+'.k'+str(i)] = get_acc_scores(X, y)
    return clfs_acc

In [5]:
def combine_dataset():
    dataFrames = []
    for i in range(1, 8):
        chirop_df = pd.read_csv('datasets/Chiroptera.Cleaned.k' + str(i) + '.csv')
        rodent_df = pd.read_csv('datasets/Rodentia.Cleaned.k' + str(i) + '.csv')
        aves_df = pd.read_csv('datasets/Aves.Cleaned.k' + str(i) + '.csv')
        frames = [chirop_df, rodent_df, aves_df]
        merged_df = pd.concat(frames)
        merged_df.index = range(len(merged_df))
        dataFrames.append(merged_df)
    return dataFrames

In [7]:
merged_dfs = combine_dataset()

In [8]:
with open('non hierarchical merged.json', 'w') as fr:
    json.dump(main(merged_dfs, 'non hierarchical merged'), fr)