# Main_DDL
---
## This file is responsible for reading the raw data, cleaning it, and transforming the data into SQL files for implementation in the server containing the database

### Import Statements

In [1]:
import pandas as pd
import os
from myMethods import iter_file_paths, generate_ddl_from_file, process_txt_file, parse_FC_data

### Reading and Cleaning Data

In [5]:
## Directory containing the data files
DATA_dir = './Data_Selected/'

## Loop through every file in the data directory
for file_path in iter_file_paths(DATA_dir):
    ## Create a df from the file
    try:
        df = process_txt_file(file_path, DATA_dir, _encoding= 'cp1252', _write=True)
        filename = os.path.basename(file_path).lower()
        print(f"Successfully processed: {filename}")
    ## If the file is not a valid text file, skip it
    except Exception as e:
        print(f"Error processing {os.path.basename(file_path)}: {e}")
    # parse_FC_data(df).to_excel(file_path.replace('.txt', '.xlsx'), header=False, index=False)
    

Error processing Ar76c.txt: Error saving Excel file: [Errno 13] Permission denied: './Data_Selected/Ar76c.xlsx'
Error processing Ar76c.xlsx: Unsupported file format. This function only processes .txt files.
Successfully processed: ar76f.txt
Error processing Ar76f.xlsx: Unsupported file format. This function only processes .txt files.
Successfully processed: ar79c.txt
Successfully processed: ar79f.txt
Successfully processed: ar80c.txt
Successfully processed: ar80f.txt
Successfully processed: ar83c.txt
Successfully processed: ar83f.txt
Successfully processed: ar85c.txt


KeyboardInterrupt: 

### Creating SQL DDL/DML scripts to transform the raw excel data into SQL tables

In [2]:
# Directory containing the data files
data_dir = './Data_Selected'
output_dir = './sql_scripts/ddl-dml/'

# Create the output directory if it doesn't exist
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

# Loop through every file in the data directory using the iter_file_paths function
for file_path in iter_file_paths(data_dir):
    file_name = os.path.basename(file_path)
    
    # Process the file using your SQL DDL function
    try:
        _, sql_script = generate_ddl_from_file(file_path)
    except Exception as e:
        print(f"Error processing {file_name}: {e}")
        continue
    
    # Build the output file name based on the original file's name
    base_name, _ = os.path.splitext(file_name)
    output_file = os.path.join(output_dir, base_name + '.sql')
    
    # Write the generated SQL script to the output file
    with open(output_file, 'w') as f:
        f.write(sql_script)
    
    print(f"SQL script for {file_name} saved to {output_file}")


Error processing Ar76c.txt: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing Ar76f.txt: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing AR79C.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing AR79F.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing AR80C.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing AR80F.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing AR83C.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing AR83F.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing AR85C.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing AR85F.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files
Error processing AR87C.TXT: Unsupported file format. Please use .csv, .xls, or .xlsx files

### cells below are explicitly for testing / debugging purposes

In [None]:
## Directory containing the data files
DATA_dir = './Data_Selected/'
output_dir= './testing_directory/'

try:
    df = process_txt_file(file_path, output_dir, _encoding= 'cp1252', _write=True)
    filename = os.path.basename(file_path).lower()
    print(f"Successfully processed: {filename}")
## If the file is not a valid text file, skip it
except Exception as e:
    print(f"Error processing {os.path.basename(file_path)}: {e}")

CREATE TABLE IF NOT EXISTS ar_1999 (
    "CODIGO" INTEGER,
    "DIST_CONC_FREG" TEXT,
    "T_ELEI" TEXT,
    "D_ELEI" TEXT,
    "SIGLA" TEXT,
    "T_DADOS" TEXT,
    "VOTOS" INTEGER,
    "P_VOTOS" NUMERIC,
    "MANDATOS" INTEGER
);

INSERT INTO ar_1999 ("CODIGO", "DIST_CONC_FREG", "T_ELEI", "D_ELEI", "SIGLA", "T_DADOS", "VOTOS", "P_VOTOS", "MANDATOS") VALUES (10000, 'AVEIRO', 'ar', '10/10/1999', 'CDS/PP', 'O', 49196, 13.579999923706055, 2);
INSERT INTO ar_1999 ("CODIGO", "DIST_CONC_FREG", "T_ELEI", "D_ELEI", "SIGLA", "T_DADOS", "VOTOS", "P_VOTOS", "MANDATOS") VALUES (10000, 'AVEIRO', 'ar', '10/10/1999', 'B.E.', 'O', 4677, 1.2899999618530273, 0);
INSERT INTO ar_1999 ("CODIGO", "DIST_CONC_FREG", "T_ELEI", "D_ELEI", "SIGLA", "T_DADOS", "VOTOS", "P_VOTOS", "MANDATOS") VALUES (10000, 'AVEIRO', 'ar', '10/10/1999', 'MPT', 'O', 842, 0.23000000417232513, 0);
INSERT INTO ar_1999 ("CODIGO", "DIST_CONC_FREG", "T_ELEI", "D_ELEI", "SIGLA", "T_DADOS", "VOTOS", "P_VOTOS", "MANDATOS") VALUES (10000, 'A