In [0]:
from log import syslog, sysError
import boto3


# Write data to Hive table
def write_output_to_hive_table(df_output, target_dir, table_name, mode):
    try:
        file_path = table_name.split(".")[1]
        df_output.repartition(1).write.mode(mode).option("path", target_dir + file_path).saveAsTable(table_name)
        syslog('Successfully written data to hive table : {}'.format(table_name))
    except Exception as ex:
        sysError('Failed to write data to hive table : {}'.format(table_name), ex)


# Write data to partitioned Hive table
def write_output_to_partioned_hive_table(df_output, target_dir, table_name, mode, partition):
    try:
        file_path = table_name.split(".")[1]
        df_output.repartition(1).write.mode(mode).partitionBy(partition).option("path",
                                                                                target_dir + file_path).saveAsTable(
            table_name)
        syslog('Successfully written data to hive table : {}'.format(table_name))
    except Exception as ex:
        sysError('Failed to write data to hive table : {}'.format(table_name), ex)


# write data to hive table with partition given as user input
def write_output_to_hive_table_input_partition(df_output, target_dir, table_name, mode, partition):
    try:
        file_path = table_name.split(".")[1]
        df_output.repartition(partition).write.mode(mode).option("path",
                                                                                        target_dir + file_path).saveAsTable(
            table_name)
        syslog('Successfully written data to hive table : {}'.format(table_name))
    except Exception as ex:
        sysError('Failed to write data to hive table : {}'.format(table_name), ex)


# write data to parquet file
def write_output_to_parquet(df_output, target_dir, directory_name, mode):
    try:
        df_output.repartition(1).write.mode(mode).parquet(target_dir + directory_name)
        syslog('Successfully written data to target directory as parquet : {}'.format(target_dir + directory_name))
    except Exception as ex:
        sysError('Failed to write data to target directory : {}'.format(target_dir + directory_name), ex)


# write data to parquet file with partition given as user input
def write_output_to_parquet_input_partition(df_output, target_dir, directory_name, mode, partition):
    try:
        df_output.repartition(partition).write.mode(mode).parquet(target_dir + directory_name)
        syslog('Successfully written data to target directory as parquet : {}'.format(directory_name))
    except Exception as ex:
        sysError('Failed to write data to target directory : {}'.format(directory_name), ex)


# write data to parquet file with partition given as user input and partition by value
def write_output_to_parquet_partition_partitionBy(df_output, target_dir, directory_name, mode, partition, partBy):
    try:
        df_output.repartition(partition).write.partitionBy(partBy).mode(mode).parquet(target_dir + directory_name)
        syslog('Successfully written data to target directory as parquet : {}'.format(directory_name))
    except Exception as ex:
        sysError('Failed to write data to target directory : {}'.format(directory_name), ex)


# write data to plain csv file reading from one s3 bucket and write to other
def write_output_to_plain_file_in_s3(pathbucket, input_filepath, ends_with, output_filepath, filename):
    try:
        output=output_filepath + filename
        client = boto3.client('s3')
        s3 = boto3.resource('s3')
        bucket = s3.Bucket(pathbucket)

        for obj in bucket.objects.filter(Delimiter='/', Prefix=input_filepath):
            key = obj.key
            if key.endswith(ends_with):
                print(obj.key)
                client.copy_object(Bucket=pathbucket,
                                   CopySource=pathbucket + '/' + obj.key,
                                   Key=output, ServerSideEncryption='aws:kms',
                                   SSEKMSKeyId='alias/VGI-KMS-S3')
        syslog('Successfully written file to target directory as plain csv file : {}'.format(output))
    except Exception as ex:
        sysError('Failed to write plain file to target directory : {}'.format(output), ex)
# write to csv file
# write to csv hive table file


#### User this function only when you need Splunk and Email alerts
def write_output_to_partioned_hive_table_wtalrt(df_output, target_dir, table_name, mode, partition,alert_var):
    try:
        file_path = table_name.split(".")[1]
        df_output.repartition(1).write.mode(mode).partitionBy(partition).option("path",
                                                                                target_dir + file_path).saveAsTable(
            table_name)
        syslog('Successfully written data to hive table : {}'.format(table_name))
    except Exception as ex:
        sysError_alert('Failed to write data to hive table : {}'.format(table_name), ex,alert_var)

#### User this function only when you need Splunk and Email alerts
def write_output_to_hive_table_with_alert(df_output, target_dir, table_name, mode, alert_var):
    try:
        file_path = table_name.split(".")[1]
        df_output.repartition(1).write.mode(mode).option("path", target_dir + file_path).saveAsTable(table_name)
        syslog('Successfully written data to hive table : {}'.format(table_name))
    except Exception as ex:
        sysError('Failed to write data to hive table : {}'.format(table_name), ex, alert_var)