In [1]:
import torch
if torch.backends.mps.is_available():
    mps_device = torch.device("mps")
    x = torch.ones(1, device=mps_device)
    print (x)
else:
    print ("MPS device not found.")

tensor([1.], device='mps:0')


In [2]:
import sys
import platform
import torch
import pandas as pd
import sklearn as sk

has_gpu = torch.cuda.is_available()
has_mps = getattr(torch,'has_mps',False)
device = "mps" if getattr(torch,'has_mps',False) \
    else "gpu" if torch.cuda.is_available() else "cpu"

print(f"Python Platform: {platform.platform()}")
print(f"PyTorch Version: {torch.__version__}")
print()
print(f"Python {sys.version}")
print(f"Pandas {pd.__version__}")
print(f"Scikit-Learn {sk.__version__}")
print("GPU is", "available" if has_gpu else "NOT AVAILABLE")
print("MPS (Apple Metal) is", "AVAILABLE" if has_mps else "NOT AVAILABLE")
print(f"Target device is {device}")

Python Platform: macOS-13.5-arm64-arm-64bit
PyTorch Version: 2.1.0

Python 3.10.13 (main, Sep 11 2023, 08:16:02) [Clang 14.0.6 ]
Pandas 2.0.3
Scikit-Learn 1.3.0
GPU is NOT AVAILABLE
MPS (Apple Metal) is AVAILABLE
Target device is mps


  has_mps = getattr(torch,'has_mps',False)
  device = "mps" if getattr(torch,'has_mps',False) \


In [3]:
"""
MNIST with PyTorch on Apple Silicon GPU

Code borrowed from PyTorch Examples.
"""

import torch
from torch import nn, optim
import torch.nn.functional as F
import torchvision
from torchvision import datasets, transforms

EPOCHS = 5

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1)
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        self.fc1 = nn.Linear(4*4*50, 500)
        self.fc2 = nn.Linear(500, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = x.view(-1, 4*4*50)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return F.log_softmax(x, dim=1)

def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))


def main():
    print("PyTorch version:", torch.__version__)
    print("Torchvision version:", torchvision.__version__)

    device = torch.device("mps")
    print("Using Device: ", device)

    train_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=True, download=True,
                       transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=128, shuffle=True)
    test_loader = torch.utils.data.DataLoader(
        datasets.MNIST('../data', train=False, transform=transforms.Compose([
                           transforms.ToTensor(),
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])),
        batch_size=128, shuffle=True)


    model = Net().to(device)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.5)

    for epoch in range(1, EPOCHS + 1):
        train(model, device, train_loader, optimizer, epoch)
        test(model, device, test_loader)

if __name__ == "__main__":
    main()

PyTorch version: 2.1.0
Torchvision version: 0.15.2
Using Device:  mps
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ../data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 25020694.00it/s]


Extracting ../data/MNIST/raw/train-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ../data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 17492518.96it/s]

Extracting ../data/MNIST/raw/train-labels-idx1-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz





Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 15815487.80it/s]


Extracting ../data/MNIST/raw/t10k-images-idx3-ubyte.gz to ../data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 16755082.47it/s]


Extracting ../data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ../data/MNIST/raw


Test set: Average loss: 0.1601, Accuracy: 9536/10000 (95%)


Test set: Average loss: 0.0956, Accuracy: 9708/10000 (97%)


Test set: Average loss: 0.0678, Accuracy: 9778/10000 (98%)


Test set: Average loss: 0.0683, Accuracy: 9783/10000 (98%)


Test set: Average loss: 0.0509, Accuracy: 9824/10000 (98%)



In [None]:
import warnings
warnings.filterwarnings("ignore")

import torch
import h5py
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt


from torch.utils.data import Dataset, DataLoader, TensorDataset
from sklearn.metrics.pairwise import sigmoid_kernel
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score
from sklearn.cluster import KMeans
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.linear_model import (
    Ridge,
    LinearRegression,
    LogisticRegression,
    ElasticNet,
    Lasso,
)
from sklearn.ensemble import (
    RandomForestRegressor,
    RandomForestClassifier,
    GradientBoostingRegressor,
    BaggingClassifier,
    ExtraTreesClassifier,
)
from sklearn.metrics import (
    accuracy_score,
    mean_squared_error,
    recall_score,
    confusion_matrix,
    f1_score,
    roc_curve,
    auc,
)

# Set the random seed 
torch.manual_seed(42)  

# Define a custom dataset class
class MarketDataset(Dataset):
    def __init__(self, inputs, targets):
        self.inputs = inputs
        self.targets = targets

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_data = self.inputs[idx]
        target = self.targets[idx]
        return input_data, target

# Define the neural network model
class Net(nn.Module):
    def __init__(
        self, input_neurons, output_neurons, hidden_layers, neurons_per_layer, dropout
    ):
        super(Net, self).__init__()

        self.input_neurons = input_neurons
        self.output_neurons = output_neurons
        self.hidden_layers = hidden_layers
        self.neurons_per_layer = neurons_per_layer
        self.dropout = dropout

        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_neurons, neurons_per_layer))
        self.layers.append(nn.ReLU())

        for _ in range(hidden_layers):
            self.layers.append(nn.Linear(neurons_per_layer, neurons_per_layer))
            self.layers.append(nn.ReLU())
            self.layers.append(nn.Dropout(p=dropout))

        self.layers.append(nn.Linear(neurons_per_layer, output_neurons))

    def forward(self, x):
        x = x.view(-1, self.input_neurons)
        for layer in self.layers:
            x = layer(x)
        return x



def train(model, device, train_loader, optimizer, epoch):
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % 10 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

def test(model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))



def main():
    print("PyTorch version:", torch.__version__)
    print("Torchvision version:", torchvision.__version__)

    device = torch.device("mps")
    print("Using Device: ", device)


    # Set the number of clients, rounds, and epochs
    sheet_name = [
        "0",
        "1",
        "2",
        "3",
        "5",
        "6",
        "7",
        "9",
        "10",
        "12",
        "14",
        "16",
        "17",
        "22",
    ]

    region_map = {
        0: "Southeast Asia",
        1: "South Asia",
        2: "Oceania",
        3: "Eastern Asia",
        4: "West Asia",
        5: "West of USA",
        6: "US Center",
        7: "West Africa",
        8: "Central Africa",
        9: "North Africa",
        10: "Western Europe",
        11: "Northern Europe",
        12: "Central America",
        13: "Caribbean",
        14: "South America",
        15: "East Africa",
        16: "Southern Europe",
        17: "East of USA",
        18: "Canada",
        19: "Southern Africa",
        20: "Central Asia",
        21: "Eastern Europe",
        22: "South of USA",}


    # Set the parameters for the model
    input_neurons = 25
    output_neurons = 1
    hidden_layers = 4
    neurons_per_layer = 64
    dropout = 0.3


    # Open the HDF5 file
    file = h5py.File(
        "market_data.h5",
        "r",
    )

    # Get the number of clients from sheet_name
    num_clients = len(sheet_name)

    # Set the number of iterations,rounds,epochs for federated learning
    num_round = [i for i in range(3,100)]
    num_epochs = 10
    num_iterations = 10

    # # Initialize an empty similarity matrix to store similarity values for each pair of clients
    # similarity_matrix_total1 = np.zeros((len(sheet_name), len(sheet_name)))
    # similarity_matrix_total2 = np.zeros((len(sheet_name), len(sheet_name)))
    # similarity_matrix_total3 = np.zeros((len(sheet_name), len(sheet_name)))


    # Initialize an empty similarity matrix to store similarity values for each pair of clients for each iteration
    similarity_matrix_total = np.zeros((len(sheet_name), len(sheet_name), num_iterations))# Import statements and other code (excluding imports)...

def preprocess_data(dataset):
    # Preprocess the data
    dataset = pd.DataFrame(dataset)
    column_names = file[client].attrs["columns"]
    dataset.columns = column_names
    dataset = dataset.drop(columns=["Region Index"])

    xs = dataset.drop(["Sales"], axis=1)
    ys = dataset["Sales"]

    xs_train, xs_test, ys_train, ys_test = train_test_split(xs, ys, test_size=0.3, random_state=42)
    xs_train, xs_val, ys_train, ys_val = train_test_split(xs_train, ys_train, test_size=0.2, random_state=42)

    # Convert data to tensors
    train_inputs = torch.tensor(xs_train.values, dtype=torch.float32).to(device)
    train_targets = torch.tensor(ys_train.values, dtype=torch.float32).to(device)
    val_inputs = torch.tensor(xs_val.values, dtype=torch.float32).to(device)
    val_targets = torch.tensor(ys_val.values, dtype=torch.float32).to(device)
    test_inputs = torch.tensor(xs_test.values, dtype=torch.float32).to(device)
    test_targets = torch.tensor(ys_test.values, dtype=torch.float32).to(device)

    

    return train_inputs, train_targets, val_inputs, val_targets, test_inputs, test_targets

def create_model(input_neurons, output_neurons, hidden_layers, neurons_per_layer, dropout):
    # Define the neural network model
    model = Net(input_neurons, output_neurons, hidden_layers, neurons_per_layer, dropout).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    return model, criterion, optimizer

def train_model(model, train_loader, criterion, optimizer, num_epochs):
    train_losses = []

    for epoch in range(num_epochs):
        model.train()

        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, targets.unsqueeze(1))
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

    epoch_loss = np.mean(train_losses)

def test_model(model, test_loader):
    model.eval()
    test_losses = []
    test_preds = []
    test_targets = []

    with torch.no_grad():
        for inputs, targets in test_loader:
            outputs = model(inputs)
            loss = criterion(outputs, targets.unsqueeze(1))
            test_losses.append(loss.item())
            test_preds.extend(outputs.cpu().numpy())  # Move predictions back to CPU for consistency
            test_targets.extend(targets.cpu().numpy())  # Move targets back to CPU for consistency

    r2 = r2_score(test_targets, test_preds)

    return r2

def main():
    # Your existing code up to the point where you want to refactor...

    device = torch.device("mps")
    print("Using Device: ", device)

    # Your existing code for loading data, setting parameters, and initializing global model...

    for num_rounds in num_round:
        print(f"Testing {num_rounds}")
        
        # Initialize a dictionary to store metrics for each client, round, and iteration
        metrics = {
            client: {"r2": [[[] for _ in range(num_rounds)] for _ in range(num_iterations)]}
            for client in sheet_name
        }

        # Initialize a list to store the feature matrices for each iteration
        all_feature_matrices = []

        # Initialize a list to store the similarity matrices for each iteration
        similarity_matrices = []

                # Initialize a shared global model for this iteration
        global_model, criterion, optimizer = create_model(input_neurons, output_neurons, hidden_layers, neurons_per_layer, dropout)


        for iteration in range(num_iterations):
            print(f"Iteration {iteration + 1}/{num_iterations}")

            # # Initialize a shared global model for this iteration
            # global_model, criterion, optimizer = create_model(input_neurons, output_neurons, hidden_layers, neurons_per_layer, dropout)

            # Initialize an empty list to store the client models for this round
            client_models = []


            for round in range(num_rounds):
                print(f"Round {round + 1}/{num_rounds}")

                for client in sheet_name:
                    # Load the state dict of the global model to the client model

                    train_inputs, train_targets, val_inputs, val_targets, test_inputs, test_targets = preprocess_data(file[client][:])

                    # Create data loaders
                    train_dataset = MarketDataset(train_inputs, train_targets)
                    val_dataset = MarketDataset(val_inputs, val_targets)
                    test_dataset = MarketDataset(test_inputs, test_targets)
                    train_loader = DataLoader(
                        train_dataset, batch_size=32, shuffle=True
                    )
                    val_loader = DataLoader(val_dataset, batch_size=32)
                    test_loader = DataLoader(test_dataset, batch_size=32)

                    # Training phase
                    train_model(model, train_loader, criterion, optimizer, num_epochs)
                    
                    # Use model to generate predictions for the test dataset
                    client_models.append(model.state_dict())
                    
                    # Testing phase
                    r2 = test_model(model, test_loader)
                    # Save the R2 value for the current round and iteration
                    metrics[client]["r2"][iteration][round] = r2

                model.load_state_dict(global_model.state_dict())
                # Average the weights across all clients after each round
                averaged_weights = {
                    k: sum(d[k] for d in client_models) / num_clients
                    for k in client_models[0].keys()
                }

                # Update the global model
                global_model.load_state_dict(averaged_weights)

            # Create the feature matrix for the current iteration and all rounds
            feature_matrix = np.array(
                [
                    [metrics[client]["r2"][iteration][r] for r in range(num_rounds)]
                    for client in sheet_name
                ]
            )

            # Check if the feature matrix is empty (no valid R2 values)
            if feature_matrix.size == 0:
                print("No valid data in the feature matrix. Skipping this iteration.")
                continue

            # Append the feature matrix to the list after adding an additional dimension
            all_feature_matrices.append(np.expand_dims(feature_matrix, axis=2))

            # Concatenate the feature matrices along the third dimension to have shape (num_clients, num_rounds, num_iterations)
            feature_matrix_total = np.concatenate(all_feature_matrices, axis=2)

            # Step 2: Standardize the Data
            scaler = StandardScaler()

            # Flatten the last two dimensions
            flattened_data = feature_matrix_total.reshape(
                feature_matrix_total.shape[0], -1
            )
            normalized_data = scaler.fit_transform(flattened_data)

            # Compute Pairwise Similarity using Sigmoid Kernel for the current iteration
            similarity_matrix_total = sigmoid_kernel(normalized_data)
            # print(similarity_matrix_total)

            # Append the similarity matrix to the list of similarity matrices
            similarity_matrices.append(similarity_matrix_total)

                    

            
            train_inputs, train_targets, val_inputs, val_targets, test_inputs, test_targets = preprocess_data(file[client][:])

            # Training phase
            train_model(model, train_loader, criterion, optimizer, num_epochs)
            
            # Testing phase
            r2 = test_model(model, test_loader)
            # Save the R2 value for the current round and iteration
            metrics[client]["r2"][iteration][round] = r2

        # Save or use the R2 values as needed...

# Rest of your existing code...

if __name__ == "__main__":
    main()
