# Live plot

In [None]:
from kafka import KafkaConsumer
from name import *
from bokeh.plotting import figure, show, output_notebook
from bokeh.models import ColumnDataSource, RangeSlider
from bokeh.layouts import column
from bokeh.io import push_notebook
from tornado.ioloop import PeriodicCallback
import numpy as np
import json
import pandas as pd

# create a Kafka consumer instance
plot_consumer = KafkaConsumer(
    bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS,  # list of Kafka brokers
    consumer_timeout_ms=10_000                  # maximum time to wait for a new message 
                                                # before stopping the consumer
)
plot_consumer.subscribe('results')

## Handling Kafka Input

In [None]:
plot_consumer.topics()

# Function to split an index string into file number and bin number, returning them as integers
def sep_index(index):
    file_num, bin_num = index.split('_')
    return int(file_num),int(bin_num)

# Function to read data from Kafka
def read_kafka():
    global batch_counter, n_cum
    print(f'\rReading batch: {batch_counter} \t\t analysing file: {n_cum}',end='\r')
    
    # read latest message
    new_data = {}
    for message in plot_consumer:
        new_data = json.loads(message.value)
        break
        
    batch_counter += 1

    # create dictionary to feed to bokeh
    # each entry correspinds to a specific file: the key is the file number
    # and the value is itself a dictionary containing a list of
    # mean values, standard deviations and number of slices used for computing the mean
    file_dict = {'mean':n_bins*[0.], 'dev':n_bins*[0.]}

    for el in new_data:
        file_num, bin_num = sep_index(el['data']['index'])
        
        file_dict['mean'][bin_num] = el['data']['mean_x']
        file_dict['dev'][bin_num] = el['data']['stddev_x']
        
    
    return file_dict

## Live Plot

In [None]:
# Initialize the notebook for displaying Bokeh plots
output_notebook()

# Initialize batch_counter and define the x-axis values
FS = fft_bandwidth
batch_counter = 0
xaxis = np.arange(FS/-2.0, FS/2.0, FS/(n_bins))+ LOFreq

# Initialize the file count (n_cum) and the error for the cumulative
n_cum = 0
cum_sigma = np.zeros(n_bins)

# Define data sources for the current batch, the cumulative mean and their error
stream_source = ColumnDataSource(data = {'freq': xaxis, 'fft' : n_bins*[0.], 'std' : n_bins*[0.] })
err_source = ColumnDataSource(data = {'freq': xaxis, 'y1' : n_bins*[0.], 'y2' : n_bins*[0.] })

cumulative_source = ColumnDataSource(data={'freq': xaxis, 'fft': 3072*[0.], 'std': 3072*[0.]})
err_cum = ColumnDataSource(data = {'freq': xaxis, 'y1' : n_bins*[0.], 'y2' : n_bins*[0.] })

# Create the Bokeh plot
plot = figure(width=900, height=450)


# Define areas for error shading and lines for data
err_fill = plot.varea(x='freq', y1='y1', y2='y2', source=err_source, alpha=.5,
                      fill_color='lightblue', legend_label='Std Batch')
line = plot.line('freq', 'fft', source=stream_source, legend_label='Current Batch',line_width=2, alpha=.8)

err_cum_fill = plot.varea(x='freq', y1='y1', y2='y2', source=err_cum, alpha=.5,
                      fill_color='bisque', legend_label='Std Cumulative')
cum_line = plot.line('freq', 'fft', source=cumulative_source,
                     legend_label='Cumulative', line_width=2, color='orange')

# Define a pandas storage for the final results
storage = pd.DataFrame({'freq':xaxis,'fft':np.zeros(n_bins), 'std':np.zeros(n_bins)})



# Define a callback function to update the plot data
def update():
    global n_cum, cum_sigma, storage
    kafka_data = read_kafka()
        
    # check if JSON message is not empty
    if kafka_data['mean'][0] > 0:
        
        # Update the single batch
        stream_source.data['fft'] = kafka_data['mean']
    
        # Update the area for the Current Batch error
        y = np.array(kafka_data['mean'])
        sigma = np.array(kafka_data['dev'])
        topline = y + sigma
        bottomline = np.where(y - sigma > 0, y - sigma, 0.)
    
        err_source.data['y1'] = bottomline
        err_source.data['y2'] = topline
        
        
        # Update the cumulative
        n_cum += 1
        cumulative_source.data['fft'] = (n_cum - 1)/n_cum*np.array(cumulative_source.data['fft']) + \
                                                 1/n_cum*np.array(kafka_data['mean'])
        
        # Update the area for the Current Batch error
        cum_y = np.array(cumulative_source.data['fft'])
        cum_sigma = 1/n_cum * np.sqrt(( n_cum -1 )**2 * cum_sigma**2 + sigma**2)
    
        cum_topline = cum_y + cum_sigma
        cum_bottomline = np.where(cum_y - cum_sigma > 0, cum_y - cum_sigma, 0.)
        
        err_cum.data['y1'] = cum_topline
        err_cum.data['y2'] = cum_bottomline
        
        # Update the store Dataframe
        storage['fft'] = cum_y
        storage['std'] = cum_sigma
    
   
    push_notebook()  # Update the notebook plot

# Configure the plot appearance and labels
plot.legend.click_policy="hide"
plot.xaxis.axis_label = 'Frequency [Hz]'
plot.yaxis.axis_label = 'FFT'


# Display the plot in the notebook and store the handle for future updates
handle = show(column(plot), notebook_handle=True)
print('Initializing',end='\r')


# Set up a periodic callback to update the plot every x seconds
callback = PeriodicCallback(update, 5_000)  # = 5 seconds
callback.start()

In [None]:
callback.stop()
storage.to_csv('final_results/run389.csv', index=False)  # Save the final results

In [None]:
plot_consumer.close()