In [None]:
# Import libraries
import pandas as pd
import numpy as np
import arff



In [None]:
# Constants
FILE_NAME = 'diabet.arff'

# Load ARFF file
data = arff.load(open(FILE_NAME, 'r'))
df = pd.DataFrame(data['data'], columns=[attr[0] for attr in data['attributes']])


df.head()

In [None]:

# Shuffle dataset 
df = df.sample(frac=1).reset_index(drop=True)

# Calculate train and test dataset size
train_size = int(0.7 * len(df))
test_size = len(df) - train_size

# Split train and test dataset
train_set = df[:train_size]
test_set = df[train_size:]

# Convert to category
train_set['HastalikDurumu'] = train_set['HastalikDurumu'].astype('category')
test_set['HastalikDurumu'] = test_set['HastalikDurumu'].astype('category')


# Save train and test dataset
def save_arff(df, filename, relation_name="relation"):
    attributes = [(col, 'NUMERIC' if df[col].dtype != 'category' else ['0', '1']) for col in df.columns]
    arff_data = {
        'relation': relation_name,
        'attributes': attributes,
        'data': df.values.tolist()
    }
    with open(filename, 'w') as f:
        arff.dump(arff_data, f)


save_arff(train_set, 'train_dataset.arff')
save_arff(test_set, 'test_dataset.arff')

