In [2]:
import numpy as np
import pandas as pd
import os
import re
from datetime import date

import warnings    # to avoid warning during executions
warnings.filterwarnings("ignore")

In [3]:
file_name = '..\..\dataExport\LOGdata\log_2022_4_20.csv'
log_rawDF = pd.read_csv(file_name, sep='*')

## Data Overview

In [4]:
log_rawDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 864368 entries, 0 to 864367
Data columns (total 9 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   Date_Time  864368 non-null  object
 1   Time       864368 non-null  int64 
 2   MicroSec   864368 non-null  int64 
 3   Type       864368 non-null  int64 
 4   Unit       864368 non-null  object
 5   Parameter  864368 non-null  object
 6   Message    864368 non-null  object
 7   Value1     411351 non-null  object
 8   Value2     495515 non-null  object
dtypes: int64(3), object(6)
memory usage: 59.4+ MB


In [5]:
log_rawDF['Parameter'].value_counts(dropna= False)

T10                 203861
Arduino             102024
feeding              80955
calculations         78184
AutoTemplateSync     49465
                     ...  
waste                    2
T41                      2
T44                      2
T45                      2
V42                      2
Name: Parameter, Length: 162, dtype: int64

In [7]:
log_type = log_rawDF['Type'].unique()
print(log_type)

[4 3 2 5 6 1]


In [8]:
log_rawDF['Type'].value_counts(dropna= False)

2    516084
1    201533
4     67915
6     49397
5     18964
3     10475
Name: Type, dtype: int64

In [9]:
log_rawDF[log_rawDF['Type'].isnull()].index.tolist()

[]

In [10]:
log_unit = log_rawDF['Unit'].unique()
print(log_unit)
print(log_rawDF['Unit'].nunique())

['FPC11_FPP11' 'FPC14' 'FFU' 'FPC22' 'FPC21_AUT21' 'FPC21' 'FPC23' 'FPC13'
 'FPC24' 'FPC31' 'FPC23_FPP23' 'FPC41' 'FPC42' 'FPC11' 'Product'
 'FPC21_FPP21' 'FPC13_FPP13' 'FPC24_FPP24' 'FFP_FFP' 'FPC31_FPP31'
 'FPC22_FPP22' 'FPC12' 'FPP11' 'FPC14_FPP14' 'FPP21' 'FPP13' 'FPP14' 'FPP'
 'FPC12_FPP' 'FPP31' 'FFP_AUTFU' 'FPC13_AUT13' 'FPC22_AUT22' 'FPC11_AUT11'
 'FPC12_AUT' 'FPC31_AUT31' 'FPC24_AUT24' 'FPC23_AUT23' 'scheduler'
 'FPC14_AUT14']
40


In [11]:
print(log_rawDF['Type'].isna().sum())
print(log_rawDF['Unit'].isna().sum())
print(log_rawDF['Parameter'].isna().sum())

0
0
0


In [14]:
log_parameter = log_rawDF['Parameter'].unique()
print(log_parameter)
print(log_rawDF['Parameter'].nunique())

['Arduino' 'program' 'T61' 'water' 'T10' 'Reactor' 'T71' 'pneumatic'
 'autoSyncMonitor' 'T42A' 'harvest' 'calculations' 'P1L' 'feeding' 'T64'
 'T42' 'T53' 'T51' 'T73' 'T74' 'T65' 'T63' 'T72' 'T52' 'T54' 'PT1_V4'
 'T55' 'T43' 'LEDlighting' 'RM29' 'RM6' 'V74' 'RM12' 'RM13' 'RM14' 'RM15'
 'RM16' 'RM17' 'RM19' 'RM2' 'RM21' 'RM22' 'RM23' 'T41A' 'T43A' 'RM35'
 'RM4' 'RM5' 'RM7' 'P42' 'RM1' 'RM30' 'Arduino_QG' 'Arduino_pH' 'V3' 'P1R'
 'P2R' 'CO2_V2' 'R1' 'PTdiff_K2' 'L1W' 'Arduino_TT1' 'Arduino_LT1'
 'Arduino1_LT52' 'Arduino2_LT72' 'Arduino1_LT41' 'Arduino2_LT61'
 'AutoLink' 'P43' 'V42' 'P10' 'V11' 'V8' 'LT1' 'RM3' 'RM11' 'RM8' 'M1'
 'P2L' 'V61' 'P11' 'Recipes' 'Products' 'L1' 'V6' 'V1' 'V72' 'chiller'
 'T62' 'V71' 'V62' 'P12' 'V43' 'L1FR' 'L1R' 'L1B' 'LT10' 'M10' 'V63'
 'RM18' 'Arduino1' 'V52' 'V64' 'PT10' 'TT0' 'waste' 'V51' 'RM20' 'RM33'
 'AutoTemplateSync' 'Arduino2_LT73' 'Arduino_CO2' 'Arduino2_LT63'
 'Arduino2_LT65' 'Arduino1_LT53' 'Arduino2_LT64' 'Arduino2_LT75'
 'Arduino1_LT54' 'Ardui

In [15]:
(log_rawDF['Unit']=='FPC11').value_counts()

False    847066
True      17302
Name: Unit, dtype: int64

In [16]:
log_rawDF.isnull().sum()

Date_Time         0
Time              0
MicroSec          0
Type              0
Unit              0
Parameter         0
Message           0
Value1       453017
Value2       368853
dtype: int64

## Inserting Units and Parameters into mariaDb tables

In [18]:
import sys
import mariadb

In [19]:
# DB connection definition - using mariadb package
try: con = mariadb.connect( 
    user="root", 
    password="password", 
    host="localhost", 
    port=3306, 
    database="data_dashboard" 
)

except mariadb.Error as ex: 
    print(f"An error occurred while connecting to MariaDB: {ex}") 
    sys.exit(1) 

cur = con.cursor()

#### Selecting data from : tbl_unit

In [21]:
# selecting all elements from table - tbl_unit
cur.execute("SELECT unit_id, unit_name FROM tbl_unit")

for (unit_id, unit_name) in cur:
    print("ID:", {unit_id}, "unit_name:", {unit_name})

ID: {31} unit_name: {'FFP_AUTFU'}
ID: {19} unit_name: {'FFP_FFP'}
ID: {3} unit_name: {'FFU'}
ID: {14} unit_name: {'FPC11'}
ID: {34} unit_name: {'FPC11_AUT11'}
ID: {1} unit_name: {'FPC11_FPP11'}
ID: {22} unit_name: {'FPC12'}
ID: {35} unit_name: {'FPC12_AUT'}
ID: {29} unit_name: {'FPC12_FPP'}
ID: {8} unit_name: {'FPC13'}
ID: {32} unit_name: {'FPC13_AUT13'}
ID: {17} unit_name: {'FPC13_FPP13'}
ID: {2} unit_name: {'FPC14'}
ID: {40} unit_name: {'FPC14_AUT14'}
ID: {24} unit_name: {'FPC14_FPP14'}
ID: {6} unit_name: {'FPC21'}
ID: {5} unit_name: {'FPC21_AUT21'}
ID: {16} unit_name: {'FPC21_FPP21'}
ID: {4} unit_name: {'FPC22'}
ID: {33} unit_name: {'FPC22_AUT22'}
ID: {21} unit_name: {'FPC22_FPP22'}
ID: {7} unit_name: {'FPC23'}
ID: {38} unit_name: {'FPC23_AUT23'}
ID: {11} unit_name: {'FPC23_FPP23'}
ID: {9} unit_name: {'FPC24'}
ID: {37} unit_name: {'FPC24_AUT24'}
ID: {18} unit_name: {'FPC24_FPP24'}
ID: {10} unit_name: {'FPC31'}
ID: {36} unit_name: {'FPC31_AUT31'}
ID: {20} unit_name: {'FPC31_FPP31'}
I

#### Inserting unique parameters into DB table : tbl_log_parameters

In [24]:
# inserting list of parameters into DB table : tbl_parameters
query = 'INSERT IGNORE INTO tbl_log_parameters (prm_id, prm_name) VALUES (%s, %s)'
tuples = pd.Series(log_parameter)
tuples = [(index+1, value) for index, value in tuples.items()]

In [26]:
cur.executemany(query, tuples)
con.commit() 

#### Inserting unique units into DB table : tbl_unit

In [27]:
# inserting list of units into DB table : tbl_unit
query = 'INSERT IGNORE INTO tbl_unit (unit_id, unit_name) VALUES (%s, %s)'
tuples = pd.Series(log_unit)
tuples = [(index+1, value) for index, value in tuples.items()]

In [28]:
cur.executemany(query, tuples)
con.commit() 

In [29]:
con.close()

## Appendix

In [None]:
# append a line to previous line if its not starting in stanard format
'''
with open('..\..\dataExport\LOGdata\log_2022_4_20.csv', 'r+', encoding="utf-8") as file:
    text = str();
    for line in file:
        if line[0:3] == "202":
            text = text + '\n';
        text = text + line.strip();
    file.seek(0);
    file.write(text);
    
'''
