In [18]:
from kafka import KafkaProducer
from kafka.admin import KafkaAdminClient, NewTopic

import os
import time
from colorama import Fore, Back, Style

from constants import *

## Kafka Admin

In [19]:
# connect to the cluster to run admin functions
kafka_admin = KafkaAdminClient(
    bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS,
)

In [20]:
# delete topics for testing purposes
topic_names=kafka_admin.list_topics()
print("Before deleting - list of topics:", topic_names)

kafka_admin.delete_topics(topics=topic_names)
print("After deleting  - list of topics:", kafka_admin.list_topics())

Before deleting - list of topics: ['__consumer_offsets', 'results', 'chunk_data']
After deleting  - list of topics: []


In [21]:
# create new topics
# raw data topic
topic_in = NewTopic(name='chunk_data',
                       num_partitions=12, 
                       replication_factor=1)
# FFT average topic
topic_out = NewTopic(name='results',
                       num_partitions=12, 
                       replication_factor=1)


kafka_admin.create_topics(new_topics=[topic_in,topic_out])
print("List of topics:",kafka_admin.list_topics())

List of topics: ['results', 'chunk_data']


## Kafka Producer

In [22]:
# check constants for data structure
print("N. samples:", n_samples,
      "\nN. bins in frequency spectrum:", n_bins,
      "\nN. of slice computed:", n_slice,
      "\ndelta_nu:", delta_nu)

N. samples: 8388608 
N. bins in frequency spectrum: 3072 
N. of slice computed: 2731 
delta_nu: 651.0416666666666


In [23]:
# function to obtain a list of all files
# inside folder_path with their complete path
def get_file_paths(folder_path):
    file_paths = []
    for root, _, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            file_paths.append(file_path)
    return file_paths

folder_path = "/home/lupi/AndreaFolder/LocalData/"
file_paths = get_file_paths(folder_path)

# reads all files inside input array and returns a list of lists
# each containing a pair of real and imaginary files 
def find_partner(arr):
    partner_arr = []
    i = 0
    
    arr=[x[-16:] for x in arr]    # arr is a list with the name of the files
    
    while i < len(arr):
        element = arr[i]
        if element.startswith('duck_i_'):
            partner = 'duck_q_' + element.split('_')[2]
            if partner in arr:
                partner_arr.append([element, partner])
                arr.remove(element)
                arr.remove(partner)
            else:
                i += 1
        elif element.startswith('duck_q_'):
            partner = 'duck_i_' + element.split('_')[2]
            if partner in arr:
                partner_arr.append([partner, element])
                arr.remove(element)
                arr.remove(partner)
            else:
                i += 1
        else:
            i += 1

    return partner_arr 

    
def read_binary_file(filename):
    with open(filename, 'rb') as file:
        data = file.read()
    return data

def get_number_from_filename(filename):
    return int(filename.split('_')[-1].split('.')[0])

In [24]:
# function to read files, unpack them and send them to Kafka
def send_chunks(file_paths,dirPath,DAQ_period=5):
    
    # returns a list of lists each containing a pair of real and imaginary files 
    partners = sorted(find_partner(file_paths),
                      key=lambda x: get_number_from_filename(x[0]))
    
    startTot = time.time()
    wastedTime=0
    
    for couple in partners: 
        start_time = time.time()
        
        # read all data from input files
        couple=[dirPath+x for x in couple]
        binary_data_real = read_binary_file(couple[0])
        binary_data_imm = read_binary_file(couple[1])

        real = bytearray(binary_data_real)
        imag = bytearray(binary_data_imm)
        
        file_num=int(couple[0][-9:-4])
        
        # unpack data
        # each message contains a number of slices equal to slices_per_msg
        # (except for the last one of each file, which contains the remainder)
        
        for f in range(n_slice):
            
            r_bin = real[4*n_bins*f:4*n_bins*(f+1)] # one float every 4 bytes
            i_bin = imag[4*n_bins*f:4*n_bins*(f+1)]
            msg = r_bin + i_bin
        
            # key = file + bin number
            key = (file_num).to_bytes(2, "big") + f.to_bytes(2, "big")
           
            print(Fore.RED +"Sending file",file_num,"\tslice number:",f+1,end="\r")
            
            # send to Kafka topic
            chunk_producer.send(topic = "chunk_data",
                                key   = key,
                                value = msg)
        
        end_time1 = time.time()
        deltat = end_time1 - start_time
        print("                                                                 ",end="\r")
        print("File", file_num,"commissioned in", round(deltat,3), "s!")
        
        chunk_producer.flush()  # Flush the producer after senting the entire file
        
        end_time2 = time.time()
        deltat = end_time2 - start_time
        print("File", file_num,"completed in", round(deltat,3), "s!")
        print("------------------------------")
        
        wastedTime+=(end_time2 - end_time1)
        
        # sleep to reproduce DAQ acquisition time
        if deltat < DAQ_period:
            time.sleep(DAQ_period - deltat)
               
    endTot = time.time()
    deltaTot = endTot - startTot
    
    print("                                                                 ")
    print("                                                                 ")
    print("------------------------------")
    print(Fore.GREEN+"Total time", round(deltaTot,3), "s!")
    print(Fore.RED +"Wasted time", round(wastedTime,3), "s!")
    print(Fore.BLACK +"------------------------------")

In [25]:
chunk_producer = KafkaProducer(bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS)
send_chunks(file_paths,folder_path)
chunk_producer.close()

File 0 commissioned in 5.538 s!                                  
File 0 completed in 17.823 s!
------------------------------
File 1 commissioned in 5.521 s!                                  
File 1 completed in 16.968 s!
------------------------------
File 2 commissioned in 5.146 s!                                  
File 2 completed in 16.612 s!
------------------------------
File 3 commissioned in 5.047 s!                                  
File 3 completed in 16.555 s!
------------------------------
File 4 commissioned in 4.914 s!                                  
File 4 completed in 15.735 s!
------------------------------
File 5 commissioned in 5.212 s!                                  
File 5 completed in 16.957 s!
------------------------------
File 6 commissioned in 5.085 s!                                  
File 6 completed in 16.786 s!
------------------------------
File 7 commissioned in 5.244 s!                                  
File 7 completed in 17.219 s!
---------------