In [4]:
import os
import regex as re
import datetime 
import shutil
import pandas as pd

In [5]:
PARENT_PATH = './' # Must contain every csv file
ARCHIVE_PATH = './historique' # Folder for archive

In [7]:
dir_ls = os.listdir(PARENT_PATH)
dir_ls

['.ipynb_checkpoints',
 'CREATE TABLE.ipynb',
 'file_update.ipynb',
 'historique',
 'IT_Equipment.10.03.2022.csv',
 'IT_Equipment.11.05.22.csv',
 'IT_Equipment.4.02.2022.csv',
 'TRIGGER.ipynb',
 'UPDATE.ipynb',
 'UPDATE_JOIN_OLIVIER_IT.ipynb',
 'VIEW_SELECT.ipynb',
 'VIEW_SELECT_JOIN_OLIVIER_IT.ipynb']

In [9]:
def get_datetime(file : str) -> tuple :
    """
    Get datetime in file title. Exemple : 'IT_Equipment.10.03.2022.csv' has been edited the 10-03-2022.
    Output :
    ---
        tuple
    """
    regex = re.split('\.', file)
    if len(regex) == 5 :    # ["filename", day, month, year, "csv"]
        regex = regex[1:-1] 
        if len(regex[-1]) == 2 :    # year = 22 in place of 2022
            regex[-1] = "20" + regex[-1]
            
        if ~any(c.isalpha() for c in regex) :    # Any alphabetic character means wrong file 
            time = datetime.datetime(day = int(regex[0]), month = int(regex[1]), year = int(regex[2]))
            
    else :
        time = None
        file = None

    return (time, file)
    

In [10]:
datetime_ls = [get_datetime(file) for file in dir_ls if get_datetime(file)[0] is not None]
last_file = max(datetime_ls)    # latest file
last_file

(datetime.datetime(2022, 5, 11, 0, 0), 'IT_Equipment.11.05.22.csv')

In [150]:
datetime_ls

[(datetime.datetime(2022, 5, 11, 0, 0), 'IT_Equipment.11.05.22.csv')]

# QUERY SQL

# Archive

Quand un fichier est utilisé, on le passe dans la section archive

In [98]:
def check_create_directory(path: str) :
    """
    Check if path exist, if not create every intermediate folder
    """

    if os.path.exists(path) == False :
        os.makedirs(path)
    return True

In [140]:
def move_to_folder(file: str, current_folder: str, new_folder: str) :
    """
    Move a file to a specified folder
    """
    current_file = os.path.join(current_folder, file)
    new_file = os.path.join(new_folder, file)
    shutil.move(current_file, new_file)
    
    return True

In [154]:
dir_ls = os.listdir(PARENT_PATH)
datetime_ls = [get_datetime(file) for file in dir_ls if get_datetime(file)[0] is not None]
check_create_directory(ARCHIVE_PATH)
for file in datetime_ls :
    if file != max(datetime_ls) :
        move_to_folder(file[1], PARENT_PATH, ARCHIVE_PATH)
    else :
        last_file = os.path.join(PARENT_PATH, file[1])

## Populate table

In [158]:
df = pd.read_csv(last_file, header = None, delimiter = ',')

In [159]:
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,67,68,69,70,71,72,73,74,75,76
0,Equipements IT,,,SU919,2885508,Sun,SunFire V890,PC,J03,1,...,16.0,0.48,0.836,130.6,,0,798936-126608,798936-126594,,OK-CMDB-ASSET-ID
1,Equipements IT,,,DEMOCRITE,2656355,Sun,SunFire V100,PC,I03,38,...,1.0,0.437,0.483,6.0,,0,798937-126811,798936-126723,,OK-CMDB-ASSET-ID
2,Equipements IT,,,LEUCIPPE,2656358,Sun,SunFire V100,PC,H03,38,...,1.0,0.437,0.483,6.0,,0,798937-126945,798937-126870,,OK-CMDB-ASSET-ID
3,Equipements IT,,,SU888,2885019,Sun,SunFire V210,PC,F03,20,...,1.0,0.425,0.635,12.0,,0,798937-127056,798937-127004,,OK-CMDB-ASSET-ID
4,Equipements IT,,,SU846,2656367,Sun Microsystems,SunFire V440,PC,G03,5,...,4.0,0.44,0.635,37.0,,0,798937-127169,798937-127151,,OK-CMDB-ASSET-ID


In [166]:
for i, rows in df.iterrows() :
    break

In [170]:
len(rows)


77

'INSERT INTO employee.employee_data VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,s)'

In [11]:
datetime_ls

[(datetime.datetime(2022, 3, 10, 0, 0), 'IT_Equipment.10.03.2022.csv'),
 (datetime.datetime(2022, 5, 11, 0, 0), 'IT_Equipment.11.05.22.csv'),
 (datetime.datetime(2022, 2, 4, 0, 0), 'IT_Equipment.4.02.2022.csv')]

In [16]:
file = datetime_ls[-1][1]
file

'IT_Equipment.4.02.2022.csv'

In [17]:
filepath = os.path.join(PARENT_PATH, file)
filepath

'./IT_Equipment.4.02.2022.csv'

In [26]:
filepath

'./IT_Equipment.4.02.2022.csv'

In [27]:
datetime_ls[i][1]

'IT_Equipment.4.02.2022.csv'

In [134]:
PARENT_PATH = './'

In [138]:
def get_csv_to_df(filepath : str, filename : str, datetime) :
    try :
        df = pd.read_csv(filepath, header = None, delimiter = ',', encoding='latin-1') 
        df["SOURCES"] = filename
        df['timestamp'] = datetime.strftime('%d-%m-%y')
        if 'Is Loading Bay Item ' in str(df.iloc[0][1]) :    # if csv have header
            df = pd.read_csv(filepath, header = 0, delimiter = ',', encoding='latin-1') 
            df["SOURCES"] = filename
            df['timestamp'] = datetime.strftime('%d-%m-%y')
        
    except pd.errors.ParserError:
        df = pd.read_csv(filepath, header = None, delimiter = ';', encoding='latin-1') 
        df["SOURCES"] = filename
        df['timestamp'] = datetime.strftime('%d-%m-%y')
        if 'Is Loading Bay Item ' in str(df.iloc[0][1]) :    # if csv have header
            df = pd.read_csv(filepath, header = 0, delimiter = ';', encoding='latin-1') 
            df["SOURCES"] = filename
            df['timestamp'] = datetime.strftime('%d-%m-%y')
    
    return df

In [141]:
for i in range(3) :
    print('-'*10, i, datetime_ls[i][1], '-'*10)
    filepath = os.path.join(PARENT_PATH, datetime_ls[i][1])
    df = get_csv_to_df(filepath, datetime_ls[i][1], datetime_ls[i][0])
    print(len(df.columns)-2)

---------- 0 IT_Equipment.10.03.2022.csv ----------
62
---------- 1 IT_Equipment.11.05.22.csv ----------
77
---------- 2 IT_Equipment.4.02.2022.csv ----------
76


In [21]:
for file in datetime_ls :
    print(file)

(datetime.datetime(2022, 3, 10, 0, 0), 'IT_Equipment.10.03.2022.csv')
(datetime.datetime(2022, 5, 11, 0, 0), 'IT_Equipment.11.05.22.csv')
(datetime.datetime(2022, 2, 4, 0, 0), 'IT_Equipment.4.02.2022.csv')


In [32]:
PARENT_PATH = './modifie' # Must contain every csv file
ARCHIVE_PATH = './historique' # Folder for archive

In [33]:
dir_ls = os.listdir(PARENT_PATH)
dir_ls

['IT_Equipment.10.03.2022.csv',
 'IT_Equipment.11.05.22.csv',
 'IT_Equipment.4.02.2022.csv']

In [34]:
datetime_ls = [get_datetime(file) for file in dir_ls if get_datetime(file)[0] is not None]
last_file = max(datetime_ls)    # latest file
last_file

(datetime.datetime(2022, 5, 11, 0, 0), 'IT_Equipment.11.05.22.csv')

In [124]:
def get_csv_to_df(filepath : str, filename : str) :
    try :
        df = pd.read_csv(filepath, header = None, delimiter = ',', encoding='latin-1') 
        df["SOURCES"] = filename
        if 'Is Loading Bay Item ' in str(df.iloc[0][1]) :    # if csv have header
            df = pd.read_csv(filepath, header = 0, delimiter = ',', encoding='latin-1') 
            df["SOURCES"] = filename
        
    except pd.errors.ParserError:
        df = pd.read_csv(filepath, header = None, delimiter = ';', encoding='latin-1') 
        df["SOURCES"] = filename
        if 'Is Loading Bay Item ' in str(df.iloc[0][1]) :    # if csv have header
            df = pd.read_csv(filepath, header = 0, delimiter = ';', encoding='latin-1') 
            df["SOURCES"] = filename
    
    return df

In [106]:
filepath = os.path.join(PARENT_PATH, datetime_ls[0][1])
filename = datetime_ls[0][1]
filename

'IT_Equipment.10.03.2022.csv'

In [107]:
filepath = "./IT_Equipment.10.03.2022.csv"

In [116]:
filename

'IT_Equipment.11.05.22.csv'

In [123]:
aa = pd.read_csv(filepath, header = None, delimiter = ';', encoding='latin-1') 
"dsqds" in str(aa.iloc[0][1])

False

In [144]:
filepath = os.path.join(PARENT_PATH, datetime_ls[2][1])
filename = datetime_ls[1][1]
datetime = datetime_ls[1][0]
filename
df = get_csv_to_df(filepath, filename, datetime)
df.head()

Unnamed: 0,ï»¿Type d'Ã©quipement,Is Loading Bay Item,Zone spÃ©cifique du quai de chargement,Nom,NumÃ©ro d'asset,Fabricant,ModÃ¨le,Nom de la salle,Nom de baie,Slot U,...,Largeur (m),Profondeur (m),Poids (kg),SynchronisÃ© avec Gateway,ID SystÃ¨me externe,ID SystÃ¨me Asset,ID SystÃ¨me Baie,ID SystÃ¨me ChÃ¢ssis,SOURCES,timestamp
0,Equipements IT,,,SU919,2885508,Sun,SunFire V890,PC,J03,1,...,0.48,0.836,130.6,,0,798936-126608,798936-126594,,IT_Equipment.11.05.22.csv,11-05-22
1,Equipements IT,,,DEMOCRITE,2656355,Sun,SunFire V100,PC,I03,38,...,0.437,0.483,6.0,,0,798937-126811,798936-126723,,IT_Equipment.11.05.22.csv,11-05-22
2,Equipements IT,,,LEUCIPPE,2656358,Sun,SunFire V100,PC,H03,38,...,0.437,0.483,6.0,,0,798937-126945,798937-126870,,IT_Equipment.11.05.22.csv,11-05-22
3,Equipements IT,,,SU888,2885019,Sun,SunFire V210,PC,F03,20,...,0.425,0.635,12.0,,0,798937-127056,798937-127004,,IT_Equipment.11.05.22.csv,11-05-22
4,Equipements IT,,,SU846,2656367,Sun Microsystems,SunFire V440,PC,G03,5,...,0.44,0.635,37.0,,0,798937-127169,798937-127151,,IT_Equipment.11.05.22.csv,11-05-22


In [None]:
CREATE TRIGGER move_equipment AFTER
INSERT ON
 IT_Equipment_records FOR EACH ROW
BEGIN

# datetime_ls[0][0].strftime("%d-%m-%y")