In [None]:
from kafka import KafkaProducer
from kafka.admin import KafkaAdminClient, NewTopic

import os
import time
from colorama import Fore, Back, Style

from name import *

## Kafka Admin

In [None]:
# connect to the cluster to run admin functions
kafka_admin = KafkaAdminClient(
    bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS,
)

In [None]:
# delete topics for testing purposes
topic_names=kafka_admin.list_topics()
print("Before deleting - list of topics:", topic_names)

kafka_admin.delete_topics(topics=topic_names)
print("After deleting  - list of topics:", kafka_admin.list_topics())

In [None]:
# create new topics
# raw data topic
topic_in = NewTopic(name='chunk_data',
                       num_partitions=12, 
                       replication_factor=1)
# FFT average topic
topic_out = NewTopic(name='results',
                       num_partitions=12, 
                       replication_factor=1)


kafka_admin.create_topics(new_topics=[topic_in,topic_out])
print("List of topics:",kafka_admin.list_topics())

## Kafka Producer

In [None]:
# check constants for data structure
print("N. samples:", n_samples,
      "\nN. bins in frequency spectrum:", n_bins,
      "\nN. of slice computed:", n_slice,
      "\ndelta_nu:", delta_nu)

In [None]:
# function to obtain a list of all files
# inside folder_path with their complete path
def get_file_paths(folder_path):
    file_paths = []
    for root, _, files in os.walk(folder_path):
        for file in files:
            file_path = os.path.join(root, file)
            file_paths.append(file_path)
    return file_paths

folder_path = "/home/lupi/AndreaFolder/LocalData/"
file_paths = get_file_paths(folder_path)

# reads all files inside input array and returns a list of lists
# each containing a pair of real and imaginary files 
def find_partner(arr):
    partner_arr = []
    i = 0
    
    arr=[x[-16:] for x in arr]    # arr is a list with the name of the files
    
    while i < len(arr):
        element = arr[i]
        if element.startswith('duck_i_'):
            partner = 'duck_q_' + element.split('_')[2]
            if partner in arr:
                partner_arr.append([element, partner])
                arr.remove(element)
                arr.remove(partner)
            else:
                i += 1
        elif element.startswith('duck_q_'):
            partner = 'duck_i_' + element.split('_')[2]
            if partner in arr:
                partner_arr.append([partner, element])
                arr.remove(element)
                arr.remove(partner)
            else:
                i += 1
        else:
            i += 1

    return partner_arr 

    
def read_binary_file(filename):
    with open(filename, 'rb') as file:
        data = file.read()
    return data

def get_number_from_filename(filename):
    return int(filename.split('_')[-1].split('.')[0])

In [None]:
slices_per_msg = 1
msg_number = math.ceil(n_slice/slices_per_msg)

# function to read files, unpack them and send them to Kafka
def send_chunks(file_paths,dirPath,DAQ_period=5):
    
    # returns a list of lists each containing a pair of real and imaginary files 
    partners = sorted(find_partner(file_paths),
                      key=lambda x: get_number_from_filename(x[0]))
    
    startTot = time.time()
    wastedTime=0
    
    for couple in partners: 
        start_time = time.time()
        
        # read all data from input files
        couple=[dirPath+x for x in couple]
        binary_data_real = read_binary_file(couple[0])
        binary_data_imm = read_binary_file(couple[1])

        real = bytearray(binary_data_real)
        imag = bytearray(binary_data_imm)
        
        file_num=int(couple[0][-9:-4])
        
        # unpack data
        # each message contains a number of slices equal to slices_per_msg
        # (except for the last one of each file, which contains the remainder)
        for f in range(msg_number):
            start = 4*n_bins*slices_per_msg*f
            end = 4*n_bins*slices_per_msg*(f+1)
            if end > 4*n_samples:
                end = 4*n_samples
            r_bin = real[start:end] # one float every 4 bytes
            i_bin = imag[start:end]
            msg = r_bin + i_bin
        
            # key = file + bin number
            key = (file_num).to_bytes(2, "big") + f.to_bytes(2, "big")
           
            print(Fore.RED +"Sending file",file_num,"\tslice number:",f+1,end="\r")
            
            # send to Kafka topic
            chunk_producer.send(topic = "chunk_data",
                                key   = key,
                                value = msg)
        
        end_time1 = time.time()
        deltat = end_time1 - start_time
        print("                                                                 ",end="\r")
        print("File", file_num,"commissioned in", round(deltat,3), "s!")
        
        chunk_producer.flush()  # Flush the producer after senting the entire file
        
        end_time2 = time.time()
        deltat = end_time2 - start_time
        print("File", file_num,"completed in", round(deltat,3), "s!")
        print("------------------------------")
        
        wastedTime+=(end_time2 - end_time1)
        
        # sleep to reproduce DAQ acquisition time
        if deltat < DAQ_period:
            time.sleep(DAQ_period - deltat)
               
    endTot = time.time()
    deltaTot = endTot - startTot
    
    print("                                                                 ")
    print("                                                                 ")
    print("------------------------------")
    print(Fore.GREEN+"Total time", round(deltaTot,3), "s!")
    print(Fore.RED +"Wasted time", round(wastedTime,3), "s!")
    print(Fore.BLACK +"------------------------------")

In [None]:
chunk_producer = KafkaProducer(bootstrap_servers=KAFKA_BOOTSTRAP_SERVERS)
send_chunks(file_paths,folder_path)
chunk_producer.close()