In [22]:
import pandas as pd
from os import listdir
from os.path import isfile, join

class LogParser():
    def __init__(self,path,isFolder):
        self.path = path
        self.isFolder = isFolder
        columns = ['Timestamp', 'Log Message', 'Message', 'User Path', 'Customer Path','Lot ID', 'Wafer ID', 'Port ID', 'Date', 'Time']
        df = pd.DataFrame(columns=columns)
        self.parsedLog = df
        
        if isFolder == 'Y':
            files = [path + f for f in listdir(self.path) if isfile(join(self.path, f))]
            for file in files:
                self.ParseLogs(file)
        else:
            self.ParseLogs(self.path)
        
    def ParseLogs(self,file):
        #placeholder columns to get all data

        columnPlaceholder = [
            'Event Type',
            'Timestamp',
            'Log Message',
            'Category',
            'User',
            'Thread ID',
            'Error Code',
            'Application',
            'Machine'
        ]
        columnPlaceholder = columnPlaceholder + ['col' + str(i) for i in range(100)]
        print(file)
        df = pd.read_csv(file, sep='\t', lineterminator='\n',names=columnPlaceholder)
        
        #step 1
        #Table.RemoveColumns({"Error Code", "Application", "Machine", "Category", "Event Type", "Thread ID", "User", "Source.Name"})
        step1 = df.drop(["Error Code", "Application", "Machine", "Category", "Event Type", "Thread ID", "User"],axis=1)
        
        #step2 Table.SelectRows(each Text.StartsWith([Log Message], "Lot Run - Dumping special environment variables") or 
        #Text.StartsWith([Log Message], "Lot Run - Lot Inspection Done") or 
        #Text.StartsWith([Log Message], "Wafer Run - Time per wafer inspection ") and Text.EndsWith([Log Message], ">"))
        step2 = step1[
            (step1["Log Message"].str.startswith('Lot Run - Dumping special environment variables',na=False))
            | (step1["Log Message"].str.startswith('Lot Run - Lot Inspection Done',na=False))
            | (step1["Log Message"].str.startswith('Wafer Run - Time per wafer inspection ',na=False))
            & (step1["Log Message"].str.endswith('>',na=False))
        ]
        
        #step3 Table.ReplaceValue("Lot Run - Dumping special environment variables:<Lot Run -Lot name : ",
        #"Lot Start <",
        #Replacer.ReplaceText,{"Log Message"})
        step2["Log Message"] = step2["Log Message"].str.replace(
            "Lot Run - Dumping special environment variables:<Lot Run -Lot name : ",
            "Lot Start <"
        )
        
        #step4 Table.ReplaceValue(
        #"Wafer Run - Time per wafer inspection = ",
        #"Duration = ",Replacer.ReplaceText,{"Log Message"})

        step2["Log Message"] = step2["Log Message"].str.replace(
            "Wafer Run - Time per wafer inspection = ",
            "Duration = "
        )
        
        #step5 Table.ReplaceValue(
        #"Lot Run - Lot Inspection Done<Lot Run -Lot name : ",
        #"Lot End <",Replacer.ReplaceText,{"Log Message"})
        step2["Log Message"] = step2["Log Message"].str.replace(
            "Lot Run - Lot Inspection Done<Lot Run -Lot name : ",
            "Lot End <"
        )
        
        #step6 Table.ReplaceValue(
        #"<Wafer Run - Lot name: ",
        #"<Wafer Run - Lot name: ",Replacer.ReplaceText,{"Log Message"})

        step2["Log Message"] = step2["Log Message"].str.replace(
            "<Wafer Run - Lot name: ",
            "<Wafer Run - Lot name: "
        )
        
        logs = step2[["Timestamp","Log Message"]]
        
        #step7 Table.SplitColumn("Log Message", Splitter.SplitTextByEachDelimiter({"<"}, QuoteStyle.Csv, false), {"Log Message.1", "Log Message.2"})
        logs[["Log Message.1", "Log Message.2"]] = logs['Log Message'].str.split('<', expand=True)
        
        #step8 Table.ReplaceValue("Wafer Run - Lot name: ","<",Replacer.ReplaceText,{"Log Message.2"})
        logs["Log Message.2"] = logs["Log Message.2"].str.replace(
            "Wafer Run - Lot name: ",
            "<"
        )
        
        #step9 Table.ReplaceValue("<","",Replacer.ReplaceText,{"Log Message.2"})
        logs["Log Message.2"] = logs["Log Message.2"].str.replace(
            "<",
            ""
        )
        
        #step10 Table.ReplaceValue("Duration = ","",Replacer.ReplaceText,{"Log Message.1"})
        logs["Log Message.1"] = logs["Log Message.1"].str.replace(
            "Duration = ",
            ""
        )
        
        #step11 Table.ReplaceValue(">","",Replacer.ReplaceText,{"Log Message.2"})
        logs["Log Message.2"] = logs["Log Message.2"].str.replace(
            ">",
            ""
        )
        
        #Step12 Table.SplitColumn("Log Message.2", Splitter.SplitTextByDelimiter("\", QuoteStyle.Csv), {"Log Message.2.2", "Log Message.2.3"})
        logs[["Log Message.2.1","Log Message.2.2", "Log Message.2.3"]] = logs['Log Message.2'].str.split("\\", expand=True)
        
        #step13 Table.SplitColumn("Log Message.2.3", Splitter.SplitTextByEachDelimiter({"Wafer ID: "}, QuoteStyle.Csv, false), {"Log Message.2.3.1", "Log Message.2.3.2"})
        logs[["Log Message.2.3.1","Log Message.2.3.2"]] = logs['Log Message.2.3'].str.split("Wafer ID: ", expand=True)
        
        #step14 Table.SplitColumn("Log Message.2.3.2", Splitter.SplitTextByEachDelimiter({"Port No: "}, QuoteStyle.Csv, false), {"Log Message.2.3.2.1", "Log Message.2.3.2.2"})
        logs[["Log Message.2.3.2.1","Log Message.2.3.2.2"]] = logs['Log Message.2.3.2'].str.split("Port No: ", expand=True)
        
        #step 15 
        """
        Table.RenameColumns({
        #{"Log Message.2.3.2.1", "Wafer ID"}, 
        #{"Log Message.2.3.2.2", "Port ID"}, 
        #{"Log Message.2.3.1", "Lot ID"}, 
        #{"Log Message.2.2", "Customer Path"}, 
        #{"Log Message.2.1", "User Path"}, 
        #{"Log Message.1", "Message"}})
        """
        logs.rename(columns = {
            "Log Message.2.3.2.1": "Wafer ID", 
            "Log Message.2.3.2.2": "Port ID", 
            "Log Message.2.3.1": "Lot ID", 
            "Log Message.2.2": "Customer Path", 
            "Log Message.2.1": "User Path", 
            "Log Message.1": "Message"
        }, inplace = True)
        
        columns = [col for col in logs.columns if col not in ["Log Message.2","Log Message.2.3","Log Message.2.3.2"]]
        step16 = logs[columns]
        
        step16["Timestamp"] = pd.to_datetime(step16.Timestamp)
        step16["Date"] = step16['Timestamp'].dt.date
        step16["Date"] = pd.to_datetime(step16.Date)
        step16["Time"] = step16['Timestamp'].dt.time
        
        self.parsedLog = pd.concat([self.parsedLog,step16],ignore_index=True)

In [23]:
path = "C:/Users/Francis John Picaso/Repositories/Teo/logs/"
parser = LogParser(path=path,isFolder = 'Y')
parser.parsedLog

C:/Users/Francis John Picaso/Repositories/Teo/logs/GeneralLog1350185-220110.log


  self.ParseLogs(file)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  step2["Log Message"] = step2["Log Message"].str.replace(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  step2["Log Message"] = step2["Log Message"].str.replace(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  step2["Log Message"] = step2["Log Message"].str.replace(
A value is 

C:/Users/Francis John Picaso/Repositories/Teo/logs/GeneralLog1350185-220111.log


  self.ParseLogs(file)


C:/Users/Francis John Picaso/Repositories/Teo/logs/GeneralLog1350185-220112.log


  self.ParseLogs(file)


C:/Users/Francis John Picaso/Repositories/Teo/logs/GeneralLog1350185-220113.log
C:/Users/Francis John Picaso/Repositories/Teo/logs/GeneralLog1350185-220114.log


Unnamed: 0,Timestamp,Log Message,Message,User Path,Customer Path,Lot ID,Wafer ID,Port ID,Date,Time
0,2022-01-10 16:03:18.371,Lot Start <DSW\Matching_Completed\a09bfbb_PM>,Lot Start,DSW,Matching_Completed,a09bfbb_PM,,,2022-01-10,16:03:18.371000
1,2022-01-10 16:05:36.994,Duration = 00:01:48<Wafer Run - Lot name: DSW\...,00:01:48,DSW,Matching_Completed,a09bfbb_PM.,@10@001.,1,2022-01-10,16:05:36.994000
2,2022-01-10 16:06:57.678,Duration = 00:01:21<Wafer Run - Lot name: DSW\...,00:01:21,DSW,Matching_Completed,a09bfbb_PM.,@10@002.,1,2022-01-10,16:06:57.678000
3,2022-01-10 16:08:20.498,Duration = 00:01:22<Wafer Run - Lot name: DSW\...,00:01:22,DSW,Matching_Completed,a09bfbb_PM.,@10@003.,1,2022-01-10,16:08:20.498000
4,2022-01-10 16:08:31.005,Lot End <DSW\Matching_Completed\a09bfbb_PM>,Lot End,DSW,Matching_Completed,a09bfbb_PM,,,2022-01-10,16:08:31.005000
...,...,...,...,...,...,...,...,...,...,...
130,2022-01-14 05:47:37.213,Lot End <USERS\JSR\AR128824-7651-D01-D04>,Lot End,USERS,JSR,AR128824-7651-D01-D04,,,2022-01-14,05:47:37.213000
131,2022-01-14 05:58:33.997,Lot Start <USERS\SCREEN\D01>,Lot Start,USERS,SCREEN,D01,,,2022-01-14,05:58:33.997000
132,2022-01-14 06:07:05.895,Duration = 00:08:05<Wafer Run - Lot name: USER...,00:08:05,USERS,SCREEN,D01.,AR102208D01.,2,2022-01-14,06:07:05.895000
133,2022-01-14 06:07:11.758,Lot End <USERS\SCREEN\D01>,Lot End,USERS,SCREEN,D01,,,2022-01-14,06:07:11.758000


In [24]:
parser.parsedLog.shape

(135, 10)

In [25]:
pd.show_versions()


INSTALLED VERSIONS
------------------
commit           : 945c9ed766a61c7d2c0a7cbb251b6edebf9cb7d5
python           : 3.9.7.final.0
python-bits      : 64
OS               : Windows
OS-release       : 10
Version          : 10.0.22000
machine          : AMD64
processor        : AMD64 Family 25 Model 80 Stepping 0, AuthenticAMD
byteorder        : little
LC_ALL           : None
LANG             : None
LOCALE           : English_United States.1252

pandas           : 1.3.4
numpy            : 1.20.3
pytz             : 2021.3
dateutil         : 2.8.2
pip              : 21.2.4
setuptools       : 58.0.4
Cython           : 0.29.24
pytest           : 6.2.4
hypothesis       : None
sphinx           : 4.2.0
blosc            : None
feather          : None
xlsxwriter       : 3.0.1
lxml.etree       : 4.6.3
html5lib         : 1.1
pymysql          : None
psycopg2         : None
jinja2           : 2.11.3
IPython          : 7.29.0
pandas_datareader: None
bs4              : 4.10.0
bottleneck       : 1.3.2
f