In [39]:
import os
import pandas as pd
import numpy as np
import re
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib import pyplot
from scipy import stats
import math
from sklearn.metrics import mean_squared_error
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_federated as tff

In [40]:
!pip install nest_asyncio
import nest_asyncio
nest_asyncio.apply()



In [41]:
dataPath = 'Data/2016-2019(One station)/shair-8781-1-6-1.csv'
airQualityData=pd.read_csv(dataPath, header=14,sep=';').rename(columns={'Start':'Start','Slut':'Stop'})
airQualityData.rename(columns = lambda x: re.sub('NOX.*','NOX',x), inplace = True)
airQualityData.rename(columns = lambda x: re.sub('PM10.*','PM10',x), inplace = True)
airQualityData.rename(columns = lambda x: re.sub('PM2.5.*','PM2_5',x), inplace = True)
airQualityData.rename(columns = lambda x: re.sub('NO2.*','NO2',x), inplace = True)
airQualityData.rename(columns = lambda x: re.sub('O3.*','O3',x), inplace = True)
airQualityData.rename(columns = lambda x: re.sub('Black Carbon.*','Black Carbon',x), inplace = True)
airQualityData['Start'] = pd.to_datetime(airQualityData['Start'])
airQualityData= airQualityData.drop('Stop',axis=1)
one_feature=airQualityData.drop(columns=['Black Carbon','O3','PM2_5'])#
one_feature.head(5)
one_feature = one_feature.fillna(0)
# one_feature = one_feature.dropna()

In [42]:
from sklearn.preprocessing import MinMaxScaler
sc = MinMaxScaler(feature_range = (0, 1))
scaled_down=one_feature.copy()
scaled_down['PM10']=sc.fit_transform(scaled_down['PM10'].values.reshape(-1, 1))
scaled_down['NOX']=sc.fit_transform(scaled_down['NOX'].values.reshape(-1, 1))
scaled_down['NO2']=sc.fit_transform(scaled_down['NO2'].values.reshape(-1, 1))

In [43]:
train=scaled_down[(scaled_down['Start']<= "2018-12-31 23:00:00")]
test=scaled_down[(scaled_down['Start'] >= "2019-01-01 00:00:00")]

In [44]:
train['predicted_pollution'] = train['PM10'].shift(-1)

# Drop the last row (it has no value for predicted pollution)
train = train.drop(train.tail(1).index)
train=train.drop('Start',axis=1)
train.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,NO2,NOX,PM10,predicted_pollution
0,0.039648,0.009583,0.189614,0.069444
1,0.027313,0.005719,0.069444,0.070652
2,0.018502,0.004482,0.070652,0.081522
3,0.014097,0.0034,0.081522,0.091184
4,0.012335,0.003246,0.091184,0.094807


In [45]:
test['predicted_pollution'] = test['PM10'].shift(-1)

# Drop the last row (it has no value for predicted pollution)
test = test.drop(test.tail(1).index)
test=test.drop('Start',axis=1)

test.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


Unnamed: 0,NO2,NOX,PM10,predicted_pollution
35064,0.04141,0.008501,0.243357,0.102053
35065,0.038767,0.007883,0.102053,0.051329
35066,0.030837,0.006337,0.051329,0.039855
35067,0.014097,0.003246,0.039855,0.052536
35068,0.021145,0.004946,0.052536,0.044082


In [46]:
train_values=train.values
test_values=test.values
test_X, test_y = test_values[:, :-1], test_values[:, -1]
train_X, train_y = train_values[:, :-1], train_values[:, -1]

In [54]:
train_X = train_X.reshape((train_X.shape[0], 1, train_X.shape[1]))
test_X = test_X.reshape((test_X.shape[0], 1, test_X.shape[1]))

In [55]:
import collections
import tensorflow as tf
from sklearn.model_selection import train_test_split

NUM_EPOCHS = 5
BATCH_SIZE = 20
SHUFFLE_BUFFER = 100
PREFETCH_BUFFER = 10

def preprocess(dataset):

  def batch_format_fn(element):
      return collections.OrderedDict(x=element['x'], y=element['y'])

  return dataset.batch(
      BATCH_SIZE).map(batch_format_fn).prefetch(PREFETCH_BUFFER)

# produce datasets for each origin
def make_federated_data(X_train, y_train, X_test, y_test):
        train_datasets = []
        test_datasets = []
        train_dataset = tf.data.Dataset.from_tensor_slices(
            ({'x': X_train, 'y': y_train}))
        
        test_dataset = tf.data.Dataset.from_tensor_slices(
            ({'x': X_test, 'y': y_test}))

        preprocessed_train_dataset = preprocess(train_dataset)
        preprocessed_test_dataset = preprocess(test_dataset)

        train_datasets.append(preprocessed_train_dataset)
        test_datasets.append(preprocessed_test_dataset)
        
        return train_datasets, test_datasets


In [56]:
train_datasets, test_datasets = make_federated_data(train_X, train_y, test_X, test_y)

In [57]:
from keras.models import Sequential
from keras.layers import LSTM, Dense
def build_model():
    model = Sequential()
    model.add(LSTM(50, input_shape=(train_X.shape[1], train_X.shape[2])))
    model.add(Dense(1))
    return model


In [58]:
def create_tff_model():
  return tff.learning.from_keras_model(build_model(), 
                                       input_spec=train_datasets[0].element_spec,
                                       loss=tf.keras.losses.MeanSquaredError(),
                                       metrics=[tf.keras.metrics.MeanSquaredError()])

In [59]:
print("Create averaging process")
iterative_process = tff.learning.build_federated_averaging_process(model_fn=create_tff_model,
                                                                   client_optimizer_fn = lambda: tf.keras.optimizers.SGD(0.002))

Create averaging process


In [60]:
print("Initzialize averaging process")
state = iterative_process.initialize()

print("Start iterations")
for _ in range(10):
  state, metrics = iterative_process.next(state, train_datasets)
  print('metrics={}'.format(metrics))

Initzialize averaging process
Start iterations


ValueError: input ModelDataset:handle:0 is not found: {{node ReduceDataset}}
	In {{node ReduceDataset}}

In [None]:
# Global model evaluated over all clients
evaluation = tff.learning.build_federated_evaluation(model_fn=create_tff_model)
test_metrics = evaluation(state.model, test_datasets)
print(test_metrics)

In [None]:
for i in range(len(test_datasets)):
    test_metrics = evaluation(state.model, [test_datasets[i]])
    print(test_metrics)

In [None]:
train_datasets[0].element_spec

In [None]:
test_datasets
