### Code for automatically populate DB tables : tbr_unit and tbl_log_parameters using unique values in the data frame columns - Units and Parameters

In [85]:
import numpy as np
import pandas as pd
import os
import re
from datetime import date

import warnings    # to avoid warning during executions
warnings.filterwarnings("ignore")

In [86]:
file_name = '..\..\dataExport\LOGdata\log_2022_4_20.csv'
log_rawDF = pd.read_csv(file_name, sep='*')

## Data Overview

In [87]:
log_rawDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 864368 entries, 0 to 864367
Data columns (total 9 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   Date_Time  864368 non-null  object
 1   Time       864368 non-null  int64 
 2   MicroSec   864368 non-null  int64 
 3   Type       864368 non-null  int64 
 4   Unit       864368 non-null  object
 5   Parameter  864368 non-null  object
 6   Message    864368 non-null  object
 7   Value1     411351 non-null  object
 8   Value2     495515 non-null  object
dtypes: int64(3), object(6)
memory usage: 59.4+ MB


In [88]:
log_rawDF['Parameter'].value_counts(dropna= False)

T10                 203861
Arduino             102024
feeding              80955
calculations         78184
AutoTemplateSync     49465
                     ...  
waste                    2
T41                      2
T44                      2
T45                      2
V42                      2
Name: Parameter, Length: 162, dtype: int64

In [89]:
log_rawDF['Unit'].value_counts(dropna= False)

FFU            431123
FPC23_FPP23     74100
FPC14           34702
FPC22           33366
FPC12           24002
FPC23           23900
FPC13           23309
FPC11_FPP11     22220
FPC21           21458
FPC11           17302
FPC22_FPP22     15011
FPC12_FPP       14727
FPC21_FPP21     13912
FPC24           12841
FPC31_AUT31      9176
FPC23_AUT23      8951
FPC13_FPP13      8868
FPC13_AUT13      8052
FFP_AUTFU        8019
FPC12_AUT        8012
FPC11_AUT11      8011
FPC22_AUT22      8011
FPC24_AUT24      8006
FFP_FFP          6351
FPC31            5859
FPC21_AUT21      5441
FPC24_FPP24      2782
FPC31_FPP31      2427
Product          1450
FPC42             777
FPC41             777
FPC14_FPP14       513
FPC14_AUT14       404
scheduler         312
FPP11             134
FPP                41
FPP21              12
FPP31               5
FPP14               2
FPP13               2
Name: Unit, dtype: int64

In [90]:
log_type = log_rawDF['Type'].unique()
print(log_type)

[4 3 2 5 6 1]


In [91]:
log_rawDF['Type'].value_counts(dropna= False)

2    516084
1    201533
4     67915
6     49397
5     18964
3     10475
Name: Type, dtype: int64

In [92]:
tmp_1 = log_rawDF.query('Type != (1)')

In [93]:
tmp_1['Type'].value_counts(dropna= False)

2    516084
4     67915
6     49397
5     18964
3     10475
Name: Type, dtype: int64

In [94]:
tmp_1 = tmp_1.query('Unit == ("FPC11", "FPC12", "FPC13", "FPC14", "FPC21", "FPC22", "FPC23", "FPC24", "FFU", "FPP", "Product", "scheduler")')

In [95]:
tmp_1['Unit'].value_counts(dropna= False)

FFU        397759
FPC14       28813
FPC22       28040
FPC23       23865
FPC13       23273
FPC21       21445
FPC12       21104
FPC24       12806
FPC11       12719
Product       954
FPP            41
Name: Unit, dtype: int64

In [96]:
tmp_1.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 570819 entries, 6 to 862517
Data columns (total 9 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   Date_Time  570819 non-null  object
 1   Time       570819 non-null  int64 
 2   MicroSec   570819 non-null  int64 
 3   Type       570819 non-null  int64 
 4   Unit       570819 non-null  object
 5   Parameter  570819 non-null  object
 6   Message    570819 non-null  object
 7   Value1     407015 non-null  object
 8   Value2     485156 non-null  object
dtypes: int64(3), object(6)
memory usage: 43.6+ MB


In [97]:
log_raw = tmp_1

In [98]:
log_raw[log_raw['Type'].isnull()].index.tolist()

[]

In [99]:
log_unit = log_raw['Unit'].unique()
print(log_unit)
print(log_raw['Unit'].nunique())

['FPC14' 'FFU' 'FPC22' 'FPC21' 'FPC23' 'FPC13' 'FPC24' 'FPC11' 'Product'
 'FPC12' 'FPP']
11


In [100]:
print(log_raw['Type'].isna().sum())
print(log_raw['Unit'].isna().sum())
print(log_raw['Parameter'].isna().sum())

0
0
0


In [101]:
log_parameter = log_raw['Parameter'].unique()
print(log_parameter)
print(log_raw['Parameter'].nunique())

['program' 'T61' 'water' 'T10' 'Reactor' 'T71' 'pneumatic' 'T42A'
 'harvest' 'calculations' 'P1L' 'feeding' 'T64' 'T42' 'T53' 'T51' 'T73'
 'T74' 'T65' 'T63' 'T72' 'T52' 'T54' 'PT1_V4' 'T55' 'T43' 'LEDlighting'
 'RM29' 'RM6' 'V74' 'RM12' 'RM13' 'RM14' 'RM15' 'RM16' 'RM17' 'RM19' 'RM2'
 'RM21' 'RM22' 'RM23' 'T41A' 'T43A' 'RM35' 'RM4' 'RM5' 'RM7' 'P42' 'RM1'
 'RM30' 'V3' 'P1R' 'P2R' 'CO2_V2' 'R1' 'PTdiff_K2' 'L1W' 'P43' 'V42' 'P10'
 'V11' 'V8' 'LT1' 'RM3' 'RM11' 'RM8' 'M1' 'P2L' 'V61' 'P11' 'Recipes'
 'Products' 'L1' 'V6' 'V1' 'V72' 'chiller' 'T62' 'V71' 'V62' 'P12' 'V43'
 'L1FR' 'L1R' 'L1B' 'Arduino' 'M10' 'V63' 'RM18' 'V52' 'V64' 'PT10' 'TT0'
 'waste' 'V51' 'RM20' 'RM33']
97


In [102]:
(log_raw['Unit']=='FPC11').value_counts()

False    558100
True      12719
Name: Unit, dtype: int64

In [103]:
log_raw.isnull().sum()

Date_Time         0
Time              0
MicroSec          0
Type              0
Unit              0
Parameter         0
Message           0
Value1       163804
Value2        85663
dtype: int64

## Inserting Units and Parameters into mariaDb tables

In [104]:
import sys
import mariadb

In [105]:
# DB connection definition - using mariadb package

try: con = mariadb.connect( 
    user="root", 
    password="password", 
    host="localhost", 
    port=3306, 
    database="data_dashboard" 
)

except mariadb.Error as ex: 
    print(f"An error occurred while connecting to MariaDB: {ex}") 
    sys.exit(1) 

cur = con.cursor()

#### Selecting data from : tbl_unit

In [106]:
# selecting all elements from table - tbl_unit
cur.execute("SELECT unit_id, unit_name FROM tbl_unit")

for (unit_id, unit_name) in cur:
    print("ID:", {unit_id}, "unit_name:", {unit_name})

#### Inserting unique parameters into DB table : tbl_log_parameters

In [20]:
# inserting list of parameters into DB table : tbl_parameters
#query = 'INSERT IGNORE INTO tbl_log_parameters (prm_id, prm_name) VALUES (%s, %s)'
#tuples = pd.Series(log_parameter)
#tuples = [(index+1, value) for index, value in tuples.items()]

In [21]:
#cur.executemany(query, tuples)
#con.commit() 

#### Inserting unique units into DB table : tbl_unit

In [107]:
# inserting list of units into DB table : tbl_unit
query = 'INSERT IGNORE INTO tbl_unit (unit_id, unit_name) VALUES (%s, %s)'
tuples = pd.Series(log_unit)
tuples = [(index+1, value) for index, value in tuples.items()]

In [108]:
cur.executemany(query, tuples)
con.commit() 

In [109]:
con.close()

## Appendix

In [25]:
# append a line to previous line if its not starting in stanard format
'''
with open('..\..\dataExport\LOGdata\log_2022_4_20.csv', 'r+', encoding="utf-8") as file:
    text = str();
    for line in file:
        if line[0:3] == "202":
            text = text + '\n';
        text = text + line.strip();
    file.seek(0);
    file.write(text);
    
'''


'\nwith open(\'..\\..\\dataExport\\LOGdata\\log_2022_4_20.csv\', \'r+\', encoding="utf-8") as file:\n    text = str();\n    for line in file:\n        if line[0:3] == "202":\n            text = text + \'\n\';\n        text = text + line.strip();\n    file.seek(0);\n    file.write(text);\n    \n'