In [4]:
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

In [5]:
def get_Xy_to_numpy(data, y_index=11):
    '''
    Separate target to variables
    Args: data = the whole dataset
         y_index = the index of the column containing the target
    
    Returns: X, y
    '''
    y_df = data.iloc[:, y_index]
    X_df = data.iloc[:, 0:y_index]
    
    X = X_df.to_numpy()
    y = y_df.to_numpy()
    
    return X, y


def to_CNN(X):
    '''
    Transforms data (x1,x2) to (x1, 1, 1, x2) for CNN
    '''
    X_cnn = X[:, np.newaxis, np.newaxis,:]
    return X_cnn


def load_data(filepath, y_index, cnn_shape=False):
    '''
    Pass the data filepath and returns the data as a numpy array with X and y
    Args: filepath = data folder path
          y_index = the index of the target column
          cnn_shape = if True, returns shape (x1, 1, 1, x2)
    
    Returns: Dictionary of all datasets
            Example: Access X and y of 'Pandas_ML_Project_SNR_Minus10'
            dataset['Pandas_ML_Project_SNR_Minus10']['X']
    '''
    
    filepaths = os.listdir(filepath) #list all files in the specified folder
    
    datasets = {}
    
    for file in filepaths:
        data = pd.read_csv(filepath +'/' + file, usecols=range(1, 13))
        X, y = get_Xy_to_numpy(data, y_index)
        
        if cnn_shape:
            X = to_CNN(X)
        
        datasets[file.removesuffix('.csv')] = {'x': X}
        datasets[file.removesuffix('.csv')]['y'] = y
        
        
    return datasets


def Xy_dataloader(X_numpy, y_numpy, batch_size=32):
    '''
    Pass X and y and load into torch DataLoaders
    Args: X, y and batch_size
    
    Returns: Loader Object
    '''
    
    X_tensor = torch.tensor(X_numpy).float()
    y_tensor = torch.tensor(y_numpy).long()
    
    dataset = TensorDataset(X_tensor, y_tensor)

    loader = DataLoader(dataset, batch_size=batch_size,shuffle=True)
    
    return loader

In [6]:
data = load_data('../data', 11, cnn_shape=False)
for i in data:
    print(i)
print("Data loaded")
print("Example: X, y = data['Pandas_ML_Project_SNR_Minus5']['x'], data['Pandas_ML_Project_SNR_Minus5']['y']")

Pandas_ML_Project_SNR_Minus10
Pandas_ML_Project_SNR_Minus15
Pandas_ML_Project_SNR_Minus20
Pandas_ML_Project_SNR_Minus25
Pandas_ML_Project_SNR_Minus5
Data loaded
Example: X, y = data['Pandas_ML_Project_SNR_Minus5']['x'], data['Pandas_ML_Project_SNR_Minus5']['y']
