In [1]:
import os
print(os.getcwd())


C:\Federated_Learning_Project


In [2]:
from pathlib import Path

print("Current directory:", Path.cwd())
print("\nContents of data folder:")

for p in Path("data").iterdir():
    print(" -", p)


Current directory: C:\Federated_Learning_Project

Contents of data folder:
 - data\UCI HAR Dataset


In [3]:
print("\nContents of dataset folder:")

for p in Path("data").glob("*"):
    if p.is_dir():
        print("\nInside:", p)
        for f in p.iterdir():
            print("   ", f)



Contents of dataset folder:

Inside: data\UCI HAR Dataset
    data\UCI HAR Dataset\.DS_Store
    data\UCI HAR Dataset\activity_labels.txt
    data\UCI HAR Dataset\features.txt
    data\UCI HAR Dataset\features_info.txt
    data\UCI HAR Dataset\README.txt
    data\UCI HAR Dataset\test
    data\UCI HAR Dataset\train


In [4]:
import pandas as pd
from pathlib import Path

DATA_DIR = Path("data/UCI HAR Dataset")

features = pd.read_csv(
    DATA_DIR / "features.txt",
    sep=r"\s+",
    header=None,
    names=["index", "feature"]
)

print("Features loaded:", features.shape)


Features loaded: (561, 2)


In [5]:
# Load train data
X_train = pd.read_csv(
    "data/UCI HAR Dataset/train/X_train.txt",
    sep=r"\s+",
    header=None
)

y_train = pd.read_csv(
    "data/UCI HAR Dataset/train/y_train.txt",
    header=None,
    names=["label"]
)

# Load test data
X_test = pd.read_csv(
    "data/UCI HAR Dataset/test/X_test.txt",
    sep=r"\s+",
    header=None
)

y_test = pd.read_csv(
    "data/UCI HAR Dataset/test/y_test.txt",
    header=None,
    names=["label"]
)

print("X_train:", X_train.shape)
print("y_train:", y_train.shape)
print("X_test :", X_test.shape)
print("y_test :", y_test.shape)


X_train: (7352, 561)
y_train: (7352, 1)
X_test : (2947, 561)
y_test : (2947, 1)


In [6]:
# Load activity labels
activity_labels = pd.read_csv(
    "data/UCI HAR Dataset/activity_labels.txt",
    sep=r"\s+",
    header=None,
    names=["id", "activity"]
)

# Create mapping
label_map = dict(zip(activity_labels.id, activity_labels.activity))

# Apply mapping
y_train["activity"] = y_train["label"].map(label_map)
y_test["activity"] = y_test["label"].map(label_map)

print(y_train.head())


   label  activity
0      5  STANDING
1      5  STANDING
2      5  STANDING
3      5  STANDING
4      5  STANDING


In [7]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

# Fit ONLY on training data
X_train_scaled = scaler.fit_transform(X_train)

# Apply same transformation to test data
X_test_scaled = scaler.transform(X_test)

print("Scaled train mean (approx 0):", X_train_scaled.mean().round(4))
print("Scaled train std  (approx 1):", X_train_scaled.std().round(4))


Scaled train mean (approx 0): 0.0
Scaled train std  (approx 1): 1.0


In [8]:
import numpy as np

NUM_CLIENTS = 5

# Shuffle training data
indices = np.random.permutation(len(X_train_scaled))
X_shuffled = X_train_scaled[indices]
y_shuffled = y_train["label"].values[indices]

# Split into clients
X_clients = np.array_split(X_shuffled, NUM_CLIENTS)
y_clients = np.array_split(y_shuffled, NUM_CLIENTS)

for i in range(NUM_CLIENTS):
    print(f"Client {i+1}: X = {X_clients[i].shape}, y = {y_clients[i].shape}")


Client 1: X = (1471, 561), y = (1471,)
Client 2: X = (1471, 561), y = (1471,)
Client 3: X = (1470, 561), y = (1470,)
Client 4: X = (1470, 561), y = (1470,)
Client 5: X = (1470, 561), y = (1470,)


In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def train_local_model(X, y):
    model = LogisticRegression(
        max_iter=200,
        multi_class="multinomial",
        solver="lbfgs",
        n_jobs=-1
    )
    model.fit(X, y)
    return model
local_models = []

for i in range(NUM_CLIENTS):
    model = train_local_model(X_clients[i], y_clients[i])
    local_models.append(model)
    print(f"Client {i+1} trained")




Client 1 trained




Client 2 trained




Client 3 trained




Client 4 trained




Client 5 trained


In [18]:
import numpy as np
from sklearn.linear_model import LogisticRegression

def federated_average(models, all_classes):
    avg_coef = np.mean([m.coef_ for m in models], axis=0)
    avg_intercept = np.mean([m.intercept_ for m in models], axis=0)

    global_model = LogisticRegression(
        max_iter=200,
        solver="lbfgs"
    )

    # Proper initialization with ALL classes
    global_model.classes_ = all_classes
    global_model.coef_ = avg_coef
    global_model.intercept_ = avg_intercept

    return global_model


In [19]:
all_classes = np.unique(y_train["label"])

global_model = federated_average(local_models, all_classes)
print("Global model created with classes:", global_model.classes_)


Global model created with classes: [1 2 3 4 5 6]


In [16]:
print("global_model exists:", "global_model" in globals())


global_model exists: True


In [23]:
from sklearn.metrics import accuracy_score

y_pred = global_model.predict(X_test_scaled)
accuracy = accuracy_score(y_test["label"], y_pred)

print("Global model accuracy:", round(accuracy, 4))


Global model accuracy: 0.9474
