In [None]:
import pandas as pd
import numpy as np
import mysql.connector
from mysql.connector import Error
import os
import regex as re
import datetime 
import shutil
from tqdm import tqdm
from Kernel_lib import *

DEBUG = True
## CONSTANT 
PARENT_PATH = './historique' # Must contain every csv file
QUERY_ENGINEERING_PATH = '/home/alexandre/Downloads/query_folder/Equipment_move_workflow.ipynb' # Notebook with all query to launch in order to get a clean csv file

database_name = {"IT_Equipment": "IT_Equipment",
                 "assets_new": "Assets",
                 "olivier_it": "Olivier_it"
                }

## Establish Connection with 6Sigma
host_name = "localhost"
user_name = "alexandre"
user_password = "Sigma64ever"
db = 'base6sigma'
connection = create_server_connection(host_name, user_name, user_password)
connection = create_db_connection(host_name, user_name, user_password, db)

def get_csv_to_df(filepath : str, db_name , filename = None , datetime = None) :
	"""
	This function transform a csv file into a DataFrame.
	Input : 
		filepath : str
			filepath of csv file 
		db_name : str
			table name
		filename : str
			None by default, not specific to all table
 		datetime : datetime.datetime
			None by default, not specific to all table. Date value
	Output :
		df : DataFrame
			DataFrame to insert into table  
	"""
	# Get the file encoding 
	with open(filepath, 'rb') as f :
		r = f.readline()
	try :
		r.decode('utf-8')
		encode = 'utf-8'
	except UnicodeError :
		encode = 'latin-1'

	try :
		
		df = pd.read_csv(filepath, header = None, delimiter = ',', encoding = encode) 
		if filename != None or datetime != None : 
			df["SOURCES"] = filename
			df['timestamp'] = datetime.strftime('%y-%m-%d')
		if "asset_id" in str(df.iloc[0].str.lower().values).lower().strip() :	# if csv have header
			df = pd.read_csv(filepath, header = 0, delimiter = ',', encoding = encode) 
			if filename != None or datetime != None : 
				df["SOURCES"] = filename
				df['timestamp'] = datetime.strftime('%y-%m-%d')
		
	except pd.errors.ParserError:
		df = pd.read_csv(filepath, header = None, delimiter = ';', encoding = encode) 
		if filename != None or datetime != None : 
			df["SOURCES"] = filename
			df['timestamp'] = datetime.strftime('%y-%m-%d')
		if "asset_id" in str(df.iloc[0].str.lower().values).lower().strip() :	# if csv have header
			df = pd.read_csv(filepath, header = 0, delimiter = ';', encoding = encode) 
			if filename != None or datetime != None : 
				df["SOURCES"] = filename
				df['timestamp'] = datetime.strftime('%y-%m-%d')

	return df

def populating_table_query(connection, df, file) :
	"""
	SQL query for insert into table based on a Dataframe
	"""
	LOG_EVERY_N = 2000
	for i, row in tqdm(df.iterrows(), total = df.shape[0], desc=f"uploading file {str(file)} ..") :
		query = f"INSERT `{file}` VALUES ("+ "%s,"*(len(row)-1)+ "%s)"
        
		cursor = connection.cursor()
		try:

			response = cursor.execute(query, tuple(row))
			connection.commit() # implement our query 
			#if (i % LOG_EVERY_N) == 0 :
				#print(f"Record inserted n°{i}")
		except Error as err:
			response = f"Error: '{err}'"
			#if (i % LOG_EVERY_N) == 0 :
				#print(f"Error: '{err}'")
	return str(response) 

keyword = 'IT_Equipment'
dir_ls = os.listdir(PARENT_PATH)   # list all csv file

datetime_ls = [get_datetime(file, keyword) for file in dir_ls if get_datetime(file, keyword)[0] is not None]   # Get specific it_equipment file
datetime_ls.sort(key = take_first_item) # sort date from min to max
## Get table from a table with all inserted records 
query = """SELECT * FROM `date_insertion_records`"""
ex = read_query(connection, query)
print(ex)
if 'Error' in ex :
	try :
		print('date_insertion_records views not founded.. Creating a new one')
		query = """drop view if EXISTS date_insertion_records ; """
		ex = execute_query(connection, query) # drop table
		query = """create view date_insertion_records as select DISTINCT(INSERTED) source_insert from `IT_Equipment_records` ORDER by INSERTED desc ; ; """
		ex = execute_query(connection, query) # drop table
		print('Done !')
	except Error as err:
		datetime_in_records = ""

elif 'Error' not in ex :
	datetime_in_records = [datetime.datetime.strptime(date[0], '%y-%m-%d') for date in ex] # convert dd-mm-yy to yy-mm-dd

## Loop for each csv file
i = 0 # count number of inserted file
for file in datetime_ls :
	if file[0] not in datetime_in_records : # if file date not in records
		### CSV TO DF
		er = ''
		print("-"*10, file[1],"-"*10)
		filepath = os.path.join(PARENT_PATH,file[1])
		df = get_csv_to_df(filepath, keyword,file[1], file[0])
		### SQL QUERY
		ex = populating_table_query(connection, df, 'IT_Equipment_records')
		if 'Error' in ex and er == '' : 
			response = input(f"{ex}, press 1 to skip, 0 to exit \t")
			if response == '0' :
				sys.exit()
			er = ex
		elif Error != '' and er != ex and 'Error' in ex :
			response = input(f"{ex}, press 1 to skip, 0 to exit \t")
			if response == '0' :
				sys.exit()
			er = ex

		i += 1
if i == 0 :
	input("No data have been updated, please check your file, press enter to continue")

with open(QUERY_ENGINEERING_PATH ,'r',  encoding='utf-8') as f :
	data = json.load(f)

## Import notebook as JSON file 
df = pd.DataFrame.from_dict(data['cells'])
df = df.drop(["id", "metadata", "execution_count", "outputs"], axis = 1) # Clean notebook
df = df.drop(df[df.cell_type== "markdown"].index )

## Clean query 
df = df.apply(lambda x : clean_query(x.source), axis = 1)
raw_list = df.values
query_list = split_multiple_query(raw_list)

## Query launcher 
for query in tqdm(query_list) :
	print("-"*10)
	print(query)
	ex = execute_query(connection, query)
	print("-"*10)

## EXPORT DATA 
table_name = 'IT_Equipment_unique_records'
dd = datetime.date.today().day
mm = datetime.date.today().month
yy = datetime.date.today().year

savepath = f'/home/alexandre/Downloads/resultat/{table_name}_{dd}_{mm}_{yy}_resultat.csv'
cmd = f'echo "select * from {table_name}" | mysql base6sigma -u"alexandre" -p"Sigma64ever" | tr "\t" ";" > {savepath}'
ex = os.system(cmd)
if ex == 0 :
	print(f'file saved to {savepath}')
else :
	print(f'Something happen, file cannot be saved to {savepath}')

input("Press Enter to finish")