# Attendance Tracking System

- Date: 23/01/2023
- Description: Attendance Tracking System


## CW_Preprocessing

This is a simple attendance tracking system created to check students attendance for all module sessions of a department in a current semester. This program:
- reads the module attendance csv files from a folder
- parses the column headers
- cleans the original data
- writes the attendance dataframes into an sqlite3

## Import Modules

In [6]:
#required modules for CW_Preprocessing code
import pandas as pd
import numpy as np
import os
import sqlite3
import re

## Functions

#### DataFrame Preparation

In [7]:
class CW_Preprocessing: 
    'CW_Preprocessing class'
    def get_attendance_details_df(self, clm):
        '''
        extracting details of attendance record from the columnn names
        input:
            clm:the columnn names, list type 
        return: 
            dfSessions: the attendance details, dataframe type
        
        '''
        dfSessions = pd.DataFrame(clm,columns = ["details"])
        dfSessions =  dfSessions["details"].str.strip("\n").str.split("\n",expand = True)
        columns = ["week", "date", "time", "type", "room"]

        if dfSessions.shape[1] == 6:
            columns.append("is_excluded")
        else:
            pass

        dfSessions.columns=columns
        dfSessions[['semester', 'week']] =  dfSessions['week'].str.split('.', expand=True)
        dfSessions["week"] = dfSessions['week'].str.replace('W','').astype(int)
        dfSessions[["start_time","end_time"]] = dfSessions["time"].str.split("-", expand = True)
        dfSessions = dfSessions.drop(columns="time")
        dfSessions.index.names = ["sessions_id"]

        return dfSessions

    def get_attendance(self,file_name):
        '''
         Read all attendance record from csv files
         input: 
             takes in the name of the file
         Return two dataframes : 
             Attendance record and the attendance details of the modules 
        '''
        file = file_name # file name 
        path = ".\\data\\" # Folder Path
        df22COA111 = pd.read_csv(path+file,index_col=0)
        clm = df22COA111.columns.to_list()
        
        dfSessionDel = self.get_attendance_details_df(clm)
        df22COA111.columns = np.arange(len(df22COA111.columns))

        return df22COA111,dfSessionDel

#### Data Cleaning

In [8]:
def clean_df(dfData):
    '''
    Copy and clean the module attendance dataframes
    input:
        dfData: the module attendance dataframe
    output:
        dfClean: returns a cleaned copy of the dataframe
    '''
    dfClean = dfData.copy()
    dfClean.columns = np.arange(len(dfClean.columns))
                     
    dfClean.dropna(axis=0, how='all', inplace=True)
    dfClean.dropna(axis=1, how='all', inplace=True)
    
    dfClean = dfClean.replace('Ex', np.nan)
    mapping = {'GPS': True, 'X':False}
    dfClean = dfClean.replace(mapping)
    
    dfClean.index.names = ['student_id']
    
    return dfClean

#### Database Operation

In [9]:
def write_to_db(dfData,TableName):
    '''
    write the dataframe to the DB
    input:
        dfData: dataframe
        TableName: dataframe name, string type
    '''
    conn = sqlite3.connect('CWDatabase.db')
    dfData.to_sql(TableName,if_exists='replace',
                  index=True,
                  con=conn) 
    conn.close()

## Main Code

In [10]:
def main():
    '''
    main function of the preprocessing code 
    ''' 
    file_list = os.listdir(".\\data\\") # puts the files from the folder into a list
    mask = "([A-Za-z]+\d+)" # match sequence
    
    for file in file_list: 
        attendance_object = CW_Preprocessing() 
        df22COA111, dfSessions=attendance_object.get_attendance(file) # read attendance from the CSV file
        dfClean = clean_df(df22COA111) # clean dataframe 
        dfCleanT = dfClean.transpose()# columns become the rows ()
        dfCleanT.index.names = ["sessions_id"] # index becomes sessions_id 

        # writing data into the DB
        match_obj = re.search(mask,file) # search for sequence in filename to generate a match object 
        module_code = match_obj.group()
        write_to_db(dfCleanT,
                module_code)
        write_to_db(dfSessions,
                module_code+"_Sessions")

#calling main function  
main()