In [1]:
import math
import tensorflow as tf
import pandas as pd
import numpy as np

import matplotlib
from tensorflow import keras
%matplotlib inline
%config InlineBackend.figure_format='retina'
matplotlib.rcParams.update(matplotlib.rcParamsDefault)
import linecache
import matplotlib.pyplot as plt
from os import listdir
from os.path import isfile, join
from tensorflow_federated import python as tff
from collections import OrderedDict
import nest_asyncio
nest_asyncio.apply()
import random
import time

2023-04-12 04:43:27.738233: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
NUM_EPOCHS = 4
BATCH_SIZE = 40
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10
time_steps =48*3
interval = 20000
future_steps = 12
split=0.8

In [3]:
def preprocess(dataset):

    def batch_format_fn(x_d, y_d):
        return OrderedDict(
            x=x_d,
            y=y_d)

    return dataset.repeat(NUM_EPOCHS).shuffle(SHUFFLE_BUFFER, seed=1).batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

def read_subset(file,lower,upper):
    data = []
    for i in range(lower,upper):
        line = linecache.getline(file, i)
        line = line.strip('\n')
        data.append(line)
    return data  

def conv_float(data):
    for i in range(len(data)):
        data[i]=float(data[i])
    return(data)
    
def is_float(element):
    try:
        float(element)
        return True
    except ValueError:
        return False
    
def create_dataset_fed(files, lower, upper, time_steps=1):
    Xs, ys = [], []
    for file in files:
        x_t, y_t=[], []
        data = read_subset(f'./ExperementData/{file}',lower,upper)
        if data!=False:
            for i in range(len(data) - time_steps -1-future_steps):
                v = data[i:(i + time_steps)] 
                z = data[(i + time_steps):(i + time_steps+future_steps)]
                if check_nulls(z) and check_nulls(v):
                    x_t.append(conv_float(v))
                    y_t.append(conv_float(z))
            x_t = np.array(x_t)[:,:,np.newaxis]
            y_t = np.array(y_t)[:,:,np.newaxis]
            Xs.append(x_t)
            ys.append(y_t)
    Xs = np.array(Xs)
    ys = np.array(ys)
    #return [tf.data.Dataset.from_tensor_slices((Xs[x],  np.array(ys[x]))) for x in range(len(Xs))]
    return [ tf.data.Dataset.from_tensor_slices((Xs[x],  np.array(ys[x]))) for x in range(len(Xs))]

def make_federated_data(files, lower, upper):
    data = create_dataset_fed(files,lower, upper,time_steps)
    return [
      preprocess( x ) for x in data if x!=False
    ]

def create_keras_model():
    return tf.keras.models.Sequential([
      keras.layers.LSTM(64, input_shape=(time_steps, 1)),
      keras.layers.Dense(12),
    ])

def model_fn():
    # We _must_ create a new model here, and _not_ capture it from an external
    # scope. TFF will call this within different graph contexts.
    keras_model = create_keras_model()
    return tff.learning.from_keras_model(
      keras_model,
      input_spec=preprocessed_example_dataset.element_spec,
      loss=tf.keras.losses.MeanSquaredError(),
      metrics=[tf.keras.metrics.RootMeanSquaredError()])
def check_nulls(data):
    if not(all(is_float(ele) for ele in data)):
        return False
    else:
        return True
    
def create_dataset(data, time_steps=1):
    Xs, ys = [], []
    for i in range(len(data) - time_steps-12):
        v = data[i:(i + time_steps)]
        z = data[(i + time_steps):(i + time_steps+12)]
        if check_nulls(z) and check_nulls(v):
            ys.append(z)
            Xs.append(v)
    return np.array(Xs), np.array(ys)

In [4]:
data = []

onlyfiles = [f for f in listdir('./ExperementData/') if isfile(join('./ExperementData/', f))and f[-4:]==".txt"]
onlyfiles

['3081.txt',
 '4076.txt',
 '2407.txt',
 '3296.txt',
 '1318.txt',
 '1330.txt',
 '3447.txt',
 '3041.txt',
 '4049.txt',
 '1904.txt',
 '1086.txt',
 '2202.txt',
 '4129.txt',
 '2574.txt',
 '4111.txt',
 '1727.txt',
 '1055.txt',
 '3871.txt',
 '2776.txt',
 '3497.txt',
 '3050.txt',
 '3910.txt',
 '2239.txt',
 '3133.txt',
 '1081.txt',
 '3046.txt',
 '1447.txt',
 '3330.txt',
 '1839.txt',
 '4163.txt',
 '3195.txt',
 '3585.txt',
 '3036.txt',
 '2315.txt',
 '2301.txt',
 '2922.txt',
 '3427.txt',
 '2067.txt',
 '1180.txt',
 '1143.txt',
 '1619.txt',
 '2304.txt',
 '3346.txt',
 '1343.txt',
 '2474.txt',
 '1627.txt',
 '3781.txt',
 '2893.txt',
 '2065.txt',
 '2529.txt',
 '2501.txt',
 '1950.txt',
 '2685.txt',
 '2121.txt',
 '2647.txt',
 '3405.txt',
 '3820.txt',
 '2519.txt',
 '3349.txt',
 '1827.txt',
 '3599.txt',
 '2081.txt',
 '2536.txt',
 '2522.txt',
 '1610.txt',
 '3359.txt',
 '3167.txt',
 '1980.txt',
 '3617.txt',
 '1404.txt',
 '2680.txt',
 '3777.txt',
 '1809.txt',
 '1834.txt',
 '2235.txt',
 '1660.txt',
 '1113.txt',

In [5]:
for f in onlyfiles:
    with open ('./ExperementData/'+f,'r') as reader:
        temp = []
        for line in reader:
            if line.strip() == 'Null':
                temp.append('Null')
            else:
                temp.append(float(line.strip()))
        data.append(temp)

In [6]:
train = [np.array([d[:int(25727*split)] for d in data]).flatten()]
test = [d[int(25727*split):] for d in data]

In [7]:
# print(X_train.shape, y_train.shape)

In [14]:
NUM_ROUNDS = 5
# is it ok that this is 95 and not 5

example_dataset = create_dataset_fed(['1086.txt'],1, 500,time_steps)[0]
preprocessed_example_dataset = preprocess(example_dataset)
# iterative_process = tff.learning.build_federated_averaging_process(
iterative_process = tff.learning.algorithms.build_weighted_fed_avg(
    model_fn,
    client_optimizer_fn=lambda: keras.optimizers.Adam(0.001),
    server_optimizer_fn=lambda: tf.keras.optimizers.SGD(learning_rate=1.0))
state = iterative_process.initialize()
start_time = time.time()
for i in range(NUM_ROUNDS):
    selected_files = onlyfiles
    location = random.randint(1,int(25727*split)-interval)
    print(location)
    print(i)
    federated_train_data = make_federated_data(selected_files, location, location+interval)
    if len(federated_train_data)>0:
#             state, metrics = iterative_process.next(state, federated_train_data)
#             print('metrics={}'.format( metrics['train']))
            result = iterative_process.next(state, federated_train_data)
            state = result.state
            metrics = result.metrics
            print('round {:2d}, metrics={}'.format(i, metrics))
print("--- %s seconds ---" % (time.time() - start_time))

277
0
round  0, metrics=OrderedDict([('distributor', ()), ('client_work', OrderedDict([('train', OrderedDict([('root_mean_squared_error', 0.82401735), ('loss', 0.67900485), ('num_examples', 7874400), ('num_batches', 196900)]))])), ('aggregator', OrderedDict([('mean_value', ()), ('mean_weight', ())])), ('finalizer', OrderedDict([('update_non_finite', 0)]))])
--- 12295.58718585968 seconds ---


In [15]:
model_for_inference = create_keras_model()
# state.model.assign_weights_to(model_for_inference)

weights = state.global_model_weights
weights.assign_weights_to(model_for_inference)

In [16]:
for i in range(len(test)):
    X_test, y_test = create_dataset(test[i], time_steps)
    if X_test.size > 0 and y_test.size > 0:
        X_test = X_test[:,:,np.newaxis]
        y_test = y_test[:,:,np.newaxis]
        y_pred = model_for_inference.predict(X_test)
        dataframe = pd.DataFrame(np.squeeze(np.array(y_pred)))
        dataframe.to_csv(r"./no_clust_results/pred/"+onlyfiles[i][:4]+'.csv')
        dataframe = pd.DataFrame(np.squeeze(np.array(y_test)))
        dataframe.to_csv(r"./no_clust_results/test/"+onlyfiles[i][:4]+'.csv')

    
# for i in range(len(test)):
#     X_test, y_test = create_dataset(test[i], time_steps)
# #     print(X_test, y_test)
#     if X_test.size > 0 and y_test.size > 0:
#         X_test = X_test[:,:,np.newaxis]
#         y_test = y_test[:,:,np.newaxis]
#         model_for_inference = create_keras_model()
#         weights = state[alloc[onlyfiles[i]]].global_model_weights
#     #     weights = process[c].get_model_weights(state[alloc[onlyfiles[i]]]) = guesses
#         weights.assign_weights_to(model_for_inference)
#     #     state[alloc[onlyfiles[i]]].model.assign_weights_to(model_for_inference) = old code

#     #     process.get_model_weights(state[alloc[onlyfiles[i]]]) = what i found online

#     #     assigning weights from fed model to new keras model for prediciton
#         y_pred = model_for_inference.predict(X_test)
#         dataframe = pd.DataFrame(np.squeeze(np.array(y_pred)))
#     #     dataframe.to_csv(r"./fed_weight_clust/pred/"+onlyfiles[i][:4]+'.csv')
#         dataframe.to_csv(r"./num_clust_four/pred-fuzzycmeans/"+onlyfiles[i][:4]+'.csv')
#         dataframe = pd.DataFrame(np.squeeze(np.array(y_test)))
#     #     dataframe.to_csv(r"./fed_weight_clust/test/"+onlyfiles[i][:4]+'.csv')
#         dataframe.to_csv(r"./num_clust_four/test-fuzzycmeans/"+onlyfiles[i][:4]+'.csv')



In [1]:
# max_time = 2
# start_time = time.time()
# while time.time() -start_time < max_time:
#     print('hi')