# Main_DDL
---
## This file is responsible for reading the raw data, cleaning it, and transforming the data into SQL files for implementation in the server containing the database

### Import Statements

In [1]:
import pandas as pd
import os
import openpyxl
from myMethods import iter_file_paths, generate_ddl_from_file, process_txt_file, parse_FC_data

### Reading and Cleaning Data

In [None]:
## Directory containing the data files
DATA_dir = './Data_Selected/'
output_dir = DATA_dir

## Loop through every file in the data directory
for filepath in iter_file_paths(DATA_dir):
    
    # file_path = os.path.join(DATA_dir, filepath)
    out_name = filepath.lower().replace('.txt', '.xlsx')

    ## process the text file and generate a DataFrame
    try:
        df = process_txt_file(filepath, _encoding= 'cp1252')
        filename = os.path.basename(filepath)
        print(f"Successfully processed: {filename}")
        ## Parse the DataFrame to clean up the data
        df = parse_FC_data(df)
        print(f"Successfully cleaned: {filename}")
    ## If the file is not a valid text file, skip it
    except Exception as e:
        print(f"Error processing {os.path.basename(filepath)}: {e}")
    ## Export the DataFrame to an Excel file
    with pd.ExcelWriter(out_name, engine='openpyxl', mode='w') as writer: df.to_excel(writer, header=False, index=False)
    print(f"Successfully exported: {filename} to {out_name}")



### Creating SQL DDL/DML scripts to transform the raw excel data into SQL tables

In [2]:
# Directory containing the data files
data_dir = './Data_Selected'
output_dir = './sql_scripts/ddl-dml/'

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through every file in the data directory using the iter_file_paths function
for file_path in iter_file_paths(data_dir):
    file_name = os.path.basename(file_path)
    
    # Process the file using your SQL DDL function
    try:
        _, sql_script = generate_ddl_from_file(file_path)
    except Exception as e:
        print(f"Error processing {file_name}: {e}")
        continue
    
    # Build the output file name based on the original file's name
    base_name, _ = os.path.splitext(file_name)
    output_file = os.path.join(output_dir, base_name + '.sql')
    
    # Write the generated SQL script to the output file
    with open(output_file, 'w') as f:
        f.write(sql_script)
    
    print(f"SQL script for {file_name} saved to {output_file}")


Error processing Ar76c.txt: Unsupported file format. Please use .csv, .xls, or .xlsx files
SQL script for ar76c.xlsx saved to ./sql_scripts/ddl-dml/ar76c.sql
Error processing Ar76f.txt: Unsupported file format. Please use .csv, .xls, or .xlsx files
SQL script for ar76f.xlsx saved to ./sql_scripts/ddl-dml/ar76f.sql
Error processing AR79C.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
SQL script for ar79c.xlsx saved to ./sql_scripts/ddl-dml/ar79c.sql
Error processing AR79F.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
SQL script for ar79f.xlsx saved to ./sql_scripts/ddl-dml/ar79f.sql
Error processing AR80C.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
SQL script for ar80c.xlsx saved to ./sql_scripts/ddl-dml/ar80c.sql
Error processing AR80F.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
SQL script for ar80f.xlsx saved to ./sql_scripts/ddl-dml/ar80f.sql
Error processing AR83C.TXT: Unsupported file format.

### cells below are explicitly for testing / debugging purposes

In [None]:
## Directory containing the data files
DATA_dir = './Data_Selected/'
_file= 'AR79C.txt'
output_dir= '../testing_directory/'
file_path = os.path.join(DATA_dir, _file)
out_name = os.path.join(output_dir, _file.lower().replace('.txt', '.xlsx'))

## testing the process_txt_file function
try:
    df = process_txt_file(file_path, _encoding= 'cp1252')
    filename = os.path.basename(file_path)
    print(f"Successfully processed: {filename}")
## If the file is not a valid text file, skip it
except Exception as e:
    print(f"Error processing {os.path.basename(file_path)}: {e}")
## Parse the DataFrame to clean up the data
df = parse_FC_data(df)
## Export the DataFrame to an Excel file
df.to_excel(out_name, header=False, index=False)


Successfully processed: AR79C.txt
