In [3]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split

# Load the dataset

data = pd.read_csv('sensor_data_1000.csv')

# Drop unnecessary columns
data = data.drop(columns=['Unnamed: 0', 'timestamp'])

# Encode the 'location' column
label_encoder = LabelEncoder()
data['location'] = label_encoder.fit_transform(data['location'])

# Normalize the features
scaler = StandardScaler()
features = data.drop(columns=['anomalized'])
scaled_features = scaler.fit_transform(features)
scaled_data = pd.DataFrame(scaled_features, columns=features.columns)
scaled_data['anomalized'] = data['anomalized']

# Split the dataset by location
locations = scaled_data['location'].unique()
location_data = {loc: scaled_data[scaled_data['location'] == loc].drop(columns=['location']) for loc in locations}

# Create training and test sets for each location
train_data = {}
test_data = {}
for loc in locations:
    loc_data = location_data[loc]
    train, test = train_test_split(loc_data, test_size=0.2, random_state=42)
    train_data[loc] = train
    test_data[loc] = test

# Define the model training function
def train_model(X, y):
    model = LogisticRegression(max_iter=1000)
    model.fit(X, y)
    return model

# Define the model aggregation function with classes_ attribute setting
def aggregate_models(models):
    avg_coef = np.mean([model.coef_ for model in models], axis=0)
    avg_intercept = np.mean([model.intercept_ for model in models], axis=0)
    
    global_model = LogisticRegression()
    global_model.coef_ = avg_coef
    global_model.intercept_ = avg_intercept
    global_model.classes_ = models[0].classes_  # Manually set the classes_ attribute
    return global_model

# Federated Learning Process
NUM_ROUNDS = 10
global_model = None

for round_num in range(NUM_ROUNDS):
    local_models = []
    
    # Train a model on each client's data
    for loc in locations:
        X_train = train_data[loc].drop(columns=['anomalized']).values
        y_train = train_data[loc]['anomalized'].values
        local_model = train_model(X_train, y_train)
        local_models.append(local_model)
    
    # Aggregate the local models to update the global model
    global_model = aggregate_models(local_models)
    
    # Evaluate the global model on the test data
    X_test = np.vstack([test_data[loc].drop(columns=['anomalized']).values for loc in locations])
    y_test = np.hstack([test_data[loc]['anomalized'].values for loc in locations])
    y_pred = global_model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    print(f'Round {round_num+1}, Accuracy: {accuracy}')


Round 1, Accuracy: 0.8259833333333333
Round 2, Accuracy: 0.8259833333333333
Round 3, Accuracy: 0.8259833333333333
Round 4, Accuracy: 0.8259833333333333
Round 5, Accuracy: 0.8259833333333333
Round 6, Accuracy: 0.8259833333333333
Round 7, Accuracy: 0.8259833333333333
Round 8, Accuracy: 0.8259833333333333
Round 9, Accuracy: 0.8259833333333333
Round 10, Accuracy: 0.8259833333333333
