In [None]:
#see preprocessing note at top for schema.
#original data source
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import torch
from torch import nn
from google.colab import drive
import os

drive_mounted = False
if not drive_mounted:
    drive.mount('/content/drive', force_remount=True)
    drive_mounted = True
    print("Drive mounted.")
drive_path = "/content/drive/MyDrive/Colab_Dev/ALZ_Variant"


Mounted at /content/drive
Drive mounted.


In [None]:
# Get a list of all items in the directory
all_items = os.listdir(drive_path)

# Filter out items that are not files or end with '.ipynb'
file_names = [item for item in all_items if os.path.isfile(os.path.join(drive_path, item)) and not item.endswith('.ipynb') and item.endswith('.npz')]

print("List of filenames (excluding .ipynb):")
for file_name in file_names:
    print(file_name)

List of filenames (excluding .ipynb):
preprocessed_alz_data.npz


# Task
Load the data from "alz_data.npz", split it into training and testing sets (x_train, x_test, y_train, y_test), and convert these arrays to PyTorch tensors.

## Load the data

### Subtask:
Load the data from the `.npz` file using `np.load`.


**Reasoning**:
Construct the full file path and load the data from the .npz file using numpy.



In [None]:
file_path = os.path.join(drive_path, "preprocessed_alz_data.npz")
loaded_data = np.load(file_path)
print(loaded_data.files)

['X_train', 'X_test', 'y_train', 'y_test']


## Extract features and labels

### Subtask:
Extract the features (x) and labels (y) from the loaded data.


**Reasoning**:
Extract the features and labels from the loaded numpy arrays.



In [None]:
x_train_np = loaded_data['X_train']
x_test_np = loaded_data['X_test']
y_train_np = loaded_data['y_train']
y_test_np = loaded_data['y_test']

## Convert to tensors

### Subtask:
Convert the numpy arrays to PyTorch tensors.


**Reasoning**:
Convert the numpy arrays to PyTorch tensors.



In [None]:
x_train = torch.from_numpy(x_train_np).float()
x_test = torch.from_numpy(x_test_np).float()
y_train = torch.from_numpy(y_train_np).long()
y_test = torch.from_numpy(y_test_np).long()

print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)

x_train shape: torch.Size([5076, 130])
x_test shape: torch.Size([1270, 130])
y_train shape: torch.Size([5076, 9])
y_test shape: torch.Size([1270, 9])


## Summary:

### Data Analysis Key Findings

* The data was successfully loaded from the "preprocessed_alz_data.npz" file using `np.load()`.
* The loaded data contains the expected keys: 'X\_train', 'X\_test', 'y\_train', and 'y\_test'.
* The features and labels for both training and testing sets were extracted into NumPy arrays: `x_train_np`, `x_test_np`, `y_train_np`, and `y_test_np`.
* These NumPy arrays were successfully converted into PyTorch tensors: `x_train`, `x_test`, `y_train`, and `y_test`.
* The feature tensors (`x_train`, `x_test`) were cast to `float`, and the label tensors (`y_train`, `y_test`) were cast to `long`, which are suitable data types for numerical features and classification labels in PyTorch.

### Insights or Next Steps

* The data is now in the required PyTorch tensor format and is ready for use in a PyTorch model for training and evaluation.
* The next step would typically involve defining a PyTorch model architecture and proceeding with the training process using the prepared tensors.
