In [None]:
import pandas as pd
import numpy as np
import mysql.connector
from mysql.connector import Error
import os
import sys
import regex as re
import datetime 
import shutil
from tqdm import tqdm
import json

## DB Connection 
def create_server_connection(host_name, user_name, user_password):
	'''
	Establish a server connection using creditential
	Parameters
	----------
	host_name : str
		host_name
	user_name : str
		user_name
	user_password : str
		user_password
	
	Returns
	-------
	connection : obj
		server connection object
	'''
	connection = None
	try:
		connection = mysql.connector.connect(
			host=host_name,
			user=user_name,
			passwd=user_password
		)
		print("MySQL Database connection successful")
	except Error as err:
		print(f"Error: '{err}'")

	return connection

def create_db_connection(host_name, user_name, user_password, db_name):
	'''
	Establish a database connection using creditential and db_name name
	Parameters
	----------
	host_name : str
		host name
	user_name : str
		user name
	user_password : str
		user password
	db_name : str
		database name
	
	Returns
	-------
	connection : obj
		server connection object
	'''
	connection = None
	try:
		connection = mysql.connector.connect(
			host=host_name,
			user=user_name,
			passwd=user_password,
			database = db_name
			
		)
		print("MySQL Database connection successful")
	except Error as err:
		print(f"Error: '{err}'")

	return connection

def execute_query(connection, query):
	'''
	Function to execute query, should be used with CREATE, ALTER, UPDATE, ..
	Parameters
	----------
	connection : obj
		server connection object
	query : str
		SQL query
	
	Returns
	-------
	connection : obj
		server connection object
	'''
	cursor = connection.cursor()
	try:
		cursor.execute(query)
		connection.commit() # implement our query 
		print("Query successful")
	except Error as err:
		response = f"Error: '{err}'"
		print(f"Error: '{err}'")
		return response
		
	return True 

def read_query(connection, query):
	'''
	
	function to read SQL query, should be used with SELECT, .. 
	Parameters
	----------
	connection : obj
		server connection object
	query : str
		SQL query
	
	Returns
	-------
	connection : obj
		server connection object
	'''
	cursor = connection.cursor()
	result = None
	try:
		response = cursor.execute(query)
		result = cursor.fetchall()
		return result
	except Error as err:
		response = f"Error: '{err}'"
		print(f"Error: '{err}'")
	return response

def populating_table_query(connection, df, file) :
	"""
	SQL query for inserting rows into a table based on a Dataframe. A first query is executed in order to get the columns name, then we insert value based on the column name
	"""
	LOG_EVERY_N = 2000
	query = f"SHOW COLUMNS from `{file}`" # Get column name
	ex = read_query(connection, query)
	col_name = tuple([col[0] for col in ex]) # tuple with column name
	for i, row in tqdm(df.iterrows(), total = df.shape[0], desc=f"uploading file {str(file)} ..")  :
		query = f"INSERT INTO `{file}` VALUES ("+ "%s,"*(len(col_name)-1)+ "%s)"
		
		cursor = connection.cursor()
		try:

			cursor.execute(query, tuple(row[0:len(col_name)]))
			connection.commit() # implement our query 
			#if (i % LOG_EVERY_N) == 0 :
				#print(f"Record inserted n°{i}")
		except Error as err:
			response = f"Error: '{err}'"
			if (i % LOG_EVERY_N) == 0 :
				print(f"Error: '{err}'")
			return str(response)
	return True 

## CSV engineering 

def get_datetime(file : str, keyword : str) -> tuple :
    """
    Get datetime in file title with keyword . Exemple : 'IT_Equipment.10.03.2022.csv' has been edited the 10-03-2022.
    Output :
    ---
        tuple
    """
    if ".csv" in file and  keyword in file :
        regex = re.split('\.', file)
        if len(regex) == 5 :    # ["filename", day, month, year, "csv"]
            regex = regex[1:-1] 
            if len(regex[-1]) == 2 :    # year = 22 in place of 2022
                regex[-1] = "20" + regex[-1]

            if ~any(c.isalpha() for c in regex) :    # Any alphabetic character means wrong file 
                time = datetime.datetime(day = int(regex[0]), month = int(regex[1]), year = int(regex[2]))
            
        else :
            time = None
            file = None
    else :
        time = None
        file = None

    return (time, file)
    
def take_first_item(elem):
	return elem[0]

def get_csv_to_df(filepath : str, db_name , filename = None , datetime = None, check_header = True) :
	try :
		
		df = pd.read_csv(filepath, header = None, delimiter = ',') 
		if filename != None or datetime != None : 
			df["SOURCES"] = filename
			df['timestamp'] = datetime.strftime('%d-%m-%y')
		if "asset_id" in str(df.iloc[0].str.lower().values).lower().strip() :	# if csv have header
			df = pd.read_csv(filepath, header = 0, delimiter = ',') 
			if filename != None or datetime != None : 
				df["SOURCES"] = filename
				df['timestamp'] = datetime.strftime('%d-%m-%y')
		if len(df.columns) == 1 : # sometimes pandas mess with ',' delimiter, so we force to ';' delimiter
			raise pd.errors.ParserError
                

	except pd.errors.ParserError:
		df = pd.read_csv(filepath, header = None, delimiter = ';') 
		if filename != None or datetime != None : 
			df["SOURCES"] = filename
			df['timestamp'] = datetime.strftime('%d-%m-%y')
		if "asset_id" in str(df.iloc[0].str.lower().values).lower().strip() :	# if csv have header
			df = pd.read_csv(filepath, header = 0, delimiter = ';') 
			if filename != None or datetime != None : 
				df["SOURCES"] = filename
				df['timestamp'] = datetime.strftime('%d-%m-%y')
	# check consistancy of headers

	for name in df.columns :   
		if 'Unnamed' in name :  # Unnamed mean columns with no data, they exist because of a forgotten seperator at the end of the header's line
			df = df.drop(name, axis = 1)

	if check_header and check_headers(db_name, filepath) != True :
		input(f'Unconsistant header for file {filepath}, press Enter to exit')
		sys.exit()
	


	return df

def check_headers(db_name, filepath) :
	FILE_HEADER_PATH = "/home/alexandre/Downloads/query_folder/file_headers.txt"
	head = ' '
	if os.path.exists(FILE_HEADER_PATH) :
		with open(FILE_HEADER_PATH ,'r',  encoding='utf-8') as f :
			data = json.load(f)

		with open(filepath ,'r',  encoding='utf-8') as f2 :
			head = f2.readline()
		
		
	return head == data[db_name]

## File / Os def

def check_create_directory(path: str) :
	"""
	Check if path exist, if not create every intermediate folder
	"""

	if os.path.exists(path) == False :
		os.makedirs(path)
	return True

def move_to_folder(file: str, current_folder: str, new_folder: str) :
	
	"""
	Move a file to a specified folder
	"""
	
	current_file = os.path.join(current_folder, file)
	new_file = os.path.join(new_folder, file)
	
	print("current_file", current_file)
	print("new_file", new_file)
	shutil.move(current_file, new_file)
	
	return True

## SQL Engeenering  

def clean_query(x : list) :
	x = ' '.join(x)
	x = x.replace("\n","").replace("\t","").replace("\\","")
	return x

def split_multiple_query(raw_list) :
	query_list = []
	for query in raw_list :
		query = query.split(";")
		if len(query) >=2 :
			for sub_query in query :
				query_list.append(sub_query)
		elif len(query) == 1 :
			query_list.append(query[0])
	return query_list
    

def dummy_query_to_csv() :
	query = "SELECT * FROM `olivier_it` "
	query_2 = "show columns from `olivier_it`"
	ex = read_query(connection, query)
	ex_name = read_query(connection, query_2)
	column_name = [x[0] for x in ex_name]
	# Returns a list of lists and then creates a pandas DataFrame
	from_db = []

	for result in ex:
		result = list(result)
		from_db.append(result)
	df = pd.DataFrame(from_db, columns=column_name)
	#df.to_csv(savepath, index = False, sep = ";")

	return df

def export_table_csv(TABLE_NAME) :
	dd = datetime.date.today().day
	mm = datetime.date.today().month
	yy = datetime.date.today().year

	savepath = f'/home/alexandre/Downloads/resultat/{TABLE_NAME}_{dd}_{mm}_{yy}_resultat.csv'
	cmd = f'echo "select * from {TABLE_NAME}" | mysql base6sigma -u"alexandre" -p"Sigma64ever" | tr "\t" ";" > {savepath}'
	ex = os.system(cmd)
	if ex == 0 :
		print(f'file saved to {savepath}')
	else :
		input(f'Something happen, file cannot be saved to {savepath}. Press Enter to exit')







