In [79]:
import pandas as pd
import os
import json
import sqlite3
import datetime

In [80]:
#functions 
dbname = "database.db"
def count_files_in_directory(directory):
    return len([name for name in os.listdir(directory) if os.path.isfile(os.path.join(directory, name))])

def list_directories_and_file_counts(start_path='.'):
    directory_files_count = []
    for dirname, dirnames, filenames in os.walk(start_path):
        # Print the directory path
       
        # Count the number of files in this directory
        file_count = count_files_in_directory(dirname)
        directory_files_count.append((dirname,file_count))
        
    df = pd.DataFrame(directory_files_count, columns=['Directory', 'File_Count'])
    df['main_dir'] = df['Directory'].str.split('/').str[-1]
    df = df[df['File_Count'] > 0]
    df['main_dir'] = pd.to_numeric(df['main_dir'], errors='coerce').astype('Int64')
    
    return df

def list_directories_and_filenames_to_dataframe(start_path='.',extensions=['.csv', '.txt']):
    # Initialize an empty list to store the directory and file data
    directory_files_data = []
    
    for dirname, dirnames, filenames in os.walk(start_path):
        # For each file in the directory, append the directory path and file name
        for filename in filenames:
            if any(filename.endswith(ext) for ext in extensions):
                # If it is, append the directory path and file name
                directory_files_data.append((dirname, filename))
           # directory_files_data.append((dirname, filename))
    
    # Create a DataFrame from the list of tuples
    df = pd.DataFrame(directory_files_data, columns=['Directory', 'Filename'])
    
    return df



def get_setup_file():

    # Define the path to the JSON file
    file_path = '../config/machine_setup.json'

    data = None
    # Read and parse the JSON data
    with open(file_path, 'r') as file:
        data = json.load(file)

    return data

def get_setup_file_by_name(data,machine_name):

    for config in data:
        # Access the configuration details
        if machine_name == config['name']:
            return config     
       
def unpack_config(config):
    name = config['name']
    root = config['root']
    weekly_directory_format = config['weekly_directory_format']
    files_to_monitor = config['files_to_monitor']
  
    directory_filter = config['directory_filter']
            
    return name, root, weekly_directory_format, files_to_monitor, directory_filter      
                
def sample_usage_using_config():

    # list_directories_and_file_counts('/workspace/machine_monitor/sample_directory')
    configuration = get_setup_file()
    wirebond_config = get_setup_file_by_name(configuration,"ACAI")

    name, root, weekly_directory_format, files_to_monitor, directory_filter  = unpack_config(wirebond_config)
    # Print the information or process it as needed
    for x in files_to_monitor:
        print(x)   
        print()


def get_equipment_names():
    configuration = get_setup_file()


def get_machine_activity_by_name(eq_type):
    conn = sqlite3.connect('database.db')

    cursor = conn.cursor()
    query = "SELECT * FROM machine_activity where machine_name = ?"
    cursor.execute(query,(eq_type,))
    results = cursor.fetchall()

    cursor.close()
    conn.close()

    return results

def get_machine_activity_by_name_ft(eq_type,ft):
    conn = sqlite3.connect('database.db')
  
    cursor = conn.cursor()

    query = "SELECT * FROM machine_activity where machine_name = ? and log_type = ?"
    cursor.execute(query,(eq_type,ft,))
    results = cursor.fetchall()

    cursor.close()
    conn.close()

    if len(results) > 0:
        return {
            "id" : results[0][0],
            "created" : results[0][1],
            "updated" : results[0][2],
            "machine_name": results[0][3],
            "current_work_week" : results[0][4],
            "current_date" : results[0][5],
            "log_type" :results[0][6]
        }

    else:
        return {}


def get_current_workweek():
   
    current_date = datetime.date.today()
    iso_calendar = current_date.isocalendar()
    workweek_number = iso_calendar[1]
    return workweek_number

def get_base_dir():
    return "../sample_directory/"


    
     

In [65]:
sample_usage_using_config()

{'type': 'Image', 'file_extensions': ['.jpg', '.jpeg'], 'file_date_format': ['XXXXXXXXXXX_MMDDYYYY_XXXXXXXXX-X'], 'file_regex': {'values': ['^[0-9A-Z]{13}_[0-9]{8}_[0-9A-Z]{12}-[0-9]{2}\\.(jpg|png|csv|txt)$']}}

{'type': 'Text Files', 'file_extensions': ['.txt', '.csv'], 'file_date_format': ['XXXXXXXXXXX_MMDDYYYY_XXXXXXXXX-X'], 'file_regex': {'values': ['^[0-9A-Z]{13}_[0-9]{8}_[0-9A-Z]{12}-[0-9]{2}\\.(jpg|png|csv|txt)$']}}



In [87]:
machine_setup = get_setup_file()
base_dir = get_base_dir()

for machine in machine_setup:
   
    machine_config = get_machine_activity_by_name(machine['name'])
    latest_workweek = get_current_workweek()
   
    for mc in machine_config:
        f_t_mon = machine['files_to_monitor']

        file_setup = None

        for f in f_t_mon:
            if f['type'] == mc[6]:
                file_setup = f
        
        current_state = get_machine_activity_by_name_ft(mc[3],mc[6])
        earlies_work_week = current_state['current_work_week']
        current_date = current_state['current_date']
        log_type = current_state['log_type']
        directories = list_directories_and_file_counts('../sample_directory/acai/')
        dic_to_work = directories[(directories['main_dir'] > earlies_work_week) & (directories['main_dir'] < latest_workweek)]
        
        file_list = list_directories_and_filenames_to_dataframe(dic_to_work.loc[1,'Directory'], file_setup['file_extensions'])
        
        if len(file_list) > 0:
           file_list['file_date_format'] = file_setup['file_date_format']
           file_list['file_regex'] = file_setup['file_regex']
        
        print(file_list)

        
 
  

        
       

                      Directory                               Filename  \
0   ../sample_directory/acai/17   XXXXXXXXXXX_04222024_XXXXXXXXX-1.txt   
1   ../sample_directory/acai/17  XXXXXXXXXXX_04222024_XXXXXXXXX-10.txt   
2   ../sample_directory/acai/17  XXXXXXXXXXX_04222024_XXXXXXXXX-11.txt   
3   ../sample_directory/acai/17  XXXXXXXXXXX_04222024_XXXXXXXXX-12.txt   
4   ../sample_directory/acai/17  XXXXXXXXXXX_04222024_XXXXXXXXX-13.txt   
5   ../sample_directory/acai/17  XXXXXXXXXXX_04222024_XXXXXXXXX-14.txt   
6   ../sample_directory/acai/17   XXXXXXXXXXX_04222024_XXXXXXXXX-2.txt   
7   ../sample_directory/acai/17   XXXXXXXXXXX_04222024_XXXXXXXXX-3.txt   
8   ../sample_directory/acai/17   XXXXXXXXXXX_04222024_XXXXXXXXX-4.txt   
9   ../sample_directory/acai/17   XXXXXXXXXXX_04222024_XXXXXXXXX-5.txt   
10  ../sample_directory/acai/17   XXXXXXXXXXX_04222024_XXXXXXXXX-6.txt   
11  ../sample_directory/acai/17   XXXXXXXXXXX_04222024_XXXXXXXXX-7.txt   
12  ../sample_directory/acai/17   XXXX

In [84]:
list_directories_and_filenames_to_dataframe('../sample_directory/acai/', ['.txt'])

Unnamed: 0,Directory,Filename
0,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-1.txt
1,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-10.txt
2,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-11.txt
3,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-12.txt
4,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-13.txt
5,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-14.txt
6,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-2.txt
7,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-3.txt
8,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-4.txt
9,../sample_directory/acai/17,XXXXXXXXXXX_04222024_XXXXXXXXX-5.txt


In [46]:
dr = list_directories_and_file_counts('../sample_directory/acai/')

In [47]:
print(dr)

                     Directory  File_Count  main_dir
1  ../sample_directory/acai/17          14        17
2  ../sample_directory/acai/21          41        21


Unnamed: 0,Directory,File_Count,main_dir
1,../sample_directory/acai/17,14,17
2,../sample_directory/acai/21,41,21


In [44]:
directory_files_count = []
for dirname, dirnames, filenames in os.walk('../sample_directory/acai/'):
    # Print the directory path
       
    # Count the number of files in this directory
    file_count = count_files_in_directory(dirname)
    directory_files_count.append((dirname,file_count))
        
df = pd.DataFrame(directory_files_count, columns=['Directory', 'File_Count'])

Unnamed: 0,Directory,File_Count,main_dir
1,../sample_directory/acai/17,14,17
2,../sample_directory/acai/21,41,21
