In [1]:
import numpy as np
import pandas as pd
import os
from datetime import date
import sqlalchemy
import pymysql

import warnings    # to avoid warning during executions
warnings.filterwarnings("ignore")

In [2]:
# load all the file names into a list (files in the LOgData folder)
os.getcwd()
_dir_path = '../../../dataExport/LOGdata'
raw_data = os.listdir(_dir_path)
raw_data

['log_2022_4_20.csv',
 'log_2022_4_21.csv',
 'log_2022_5_18.csv',
 'log_2022_5_3.csv']

In [12]:
# ========================= set file name to process the log data  ===========================.
filename = raw_data[2]
filename

'log_2022_5_18.csv'

In [4]:
# database connection with mariaDB using SQL Alchemy
def dbConn_sqlAlc():
    database_username = 'root'
    database_password = 'password'
    database_ip       = '127.0.0.1:3306'
    database_name     = 'data_dashboard'
    database_connection = sqlalchemy.create_engine('mariadb+mariadbconnector://{0}:{1}@{2}/{3}'.
                                               format(database_username, database_password, 
                                                      database_ip, database_name))
    return database_connection

# # database connection with mariaDB using Maria DB package
def dBCon_Maria():
    connection = pymysql.connect(host='localhost',
                            user='root',
                            password='password',
                            db='data_dashboard')
    return connection


# connection = dBCon_Maria()
# cursor=connection.cursor()                                            
# database_connection = dbConn_sqlAlc()

In [5]:
# Delete extra non standard line - append the extra line with previous line
def del_newline(file_path):
    with open(file_path, 'r+', encoding="utf-8") as file:
        text = str();
        for line in file:
            if line[0:3] == "202":
                text = text + '\n';
            text = text + line.strip();
        file.seek(0);
        file.write(text);

# data type conversion and droping rows if parameter column value is missing
def log_tweak(log_rawDF, tbl_unit):  #def log_tweak(log_rawDF, tbl_unit, tbl_parameter):
    return (
    log_rawDF
    .query('Type != 1')
    .query('Unit == ("FPC11", "FPC12", "FPC13", "FPC14", "FPC21", "FPC22", "FPC23", "FPC24", "FFU", "FPP", "Product", "scheduler")')
    .dropna(subset=['Parameter', 'Unit'])
    #.drop(columns=['MicroSec'])
    .assign(Unit = log_rawDF.Unit.map(tbl_unit.set_index('unit_name')['unit_id']),
           #Parameter = log_rawDF.Parameter.map(tbl_parameter.set_index('prm_name')['prm_id']),
           Date_Time = pd.to_datetime(log_rawDF.Date_Time)
           )
    .astype({'Type' : 'int8', 'Unit' : 'int16'}) #.astype({'Type' : 'int8', 'Unit' : 'int16', 'Parameter' : 'int32'})
    .rename(columns = {'Date_Time':'date_time', 'Time':'time_epoch', 'Type':'category', 'Unit':'unit', 'Parameter':'parameter', 'Message':'message'})
    .fillna('empty')
    #.sort_values('Time')
    #.replace('/'',' ', regex=True)
    #.info()
    )

# function to export the log data into db table - tbl_log_data
def log_exporttoDB(log_cleaned):
    connection = dBCon_Maria()
    cursor=connection.cursor()      
    cols = "`,`".join([str(i) for i in log_cleaned.columns.tolist()])
    for i,row in log_cleaned.iterrows():
        sql = "INSERT INTO `tbl_log_data` (`" +cols + "`) VALUES (" + "%s,"*(len(row)-1) + "%s)"
        cursor.execute(sql, tuple(row))

    connection.commit()
    connection.close()


# log_cleaned = log_tweak(log_rawDF, tbl_unit, tbl_parameter)
# del_newline(file_path)

In [13]:
# load content of the export_list into list split by new line
# data = [line.strip() for line in open("exported_list.txt", 'r')]
with open('./exported_list.txt') as f:
    lines = f.read().splitlines()

if (filename not in lines):
    file_path=  os.path.join(_dir_path, filename)
    #del_newline(file_path)
    database_connection = dbConn_sqlAlc()
    tbl_unit = pd.read_sql('SELECT unit_id, unit_name FROM tbl_unit', database_connection)
    #tbl_parameter = pd.read_sql('SELECT prm_id, prm_name FROM tbl_log_parameters', database_connection)
    log_rawDF = pd.read_csv(file_path, sep='*')
    log_cleaned = log_tweak(log_rawDF, tbl_unit) #log_cleaned = log_tweak(log_rawDF, tbl_unit, tbl_parameter)
    try:
        export_status = log_exporttoDB(log_cleaned)
        file1 = open("exported_list.txt", "a+")
        file1.write("\n")       
        file1.write(filename)
        file1.close()
        print('file is exported')
    except Exception as e: print(e)

else:
    print('data is present in the db')

file is exported
