In [1]:
import os
import psycopg2
import pandas as pd
import numpy as np

In [2]:
def loadDataAndCreateColumns(file, STUDYNAME):
    '''
    This function loads a csv file and parses the column types to generate a dictionary of PostgreSQL data types.
    It returns a dictionary with the columns data types.
    
    Parameters
    ---------- 
    file : str, path to the csv file. 
    '''
    
    df = pd.read_csv(file, sep=None, engine = 'python')
    iterator = pd.read_csv(file, sep=None, engine = 'python', iterator=True)
    inferredDelimiter = iterator._engine.data.dialect.delimiter
    print(f'File delimiter is {inferredDelimiter}\n')
    df.rename(columns={list(df)[0]: df.columns[0].replace("\ufeff", "")}, inplace = True)
    df.columns = df.columns.str.replace("&", "and").str.replace("-", "_")
    
    df['uniqueIdentifier'] = df.id.apply(lambda r: f'{STUDYNAME}_{r}')
    
    dtypeDict = dict()
    for c in df.columns:
        msk = df[c].notna()
        df.loc[msk, c] = pd.to_numeric(df.loc[msk, c].astype(str).str.replace(',', '.'), errors="ignore")
        if df[c].dtype in (np.int64, np.int32, int, float, bool):
            if df[c].dtype in (np.int32, np.int64) or df[msk][c].apply(float.is_integer).all():
                df.loc[msk, c] = df.loc[msk, c].astype(int)
                if set(df[c].dropna().unique()) == {0, 1}:
                    df[c] = df[c].astype(bool)
                    dtypeDict[c] = 'bool'
                else:
                    dtypeDict[c] = 'int'
            else:
                dtypeDict[c] = 'float'
        else:
            dtypeDict[c] = "text" # object
    for k, v in dtypeDict.items():
        if v == 'int':
            df[k] = df[k].astype(str).str.replace('\.0$', '')
            df = df.replace('nan', np.nan)
             
    return df, dtypeDict



def createTable(columnDict, TABLENAME):
    
    columns = "(" + ",\n".join([f"{k} {v}" for k,v in columnDict.items()]) + ")" # Creating the columns for the table
    
    conn = psycopg2.connect("host=localhost dbname=test user=postgres password=farol12g")
    cur = conn.cursor()
    cur.execute(f"CREATE TABLE {TABLENAME} \n {columns}")
    conn.commit()
    cur.close()
    conn.close()

    
def addData(file, TABLENAME):
    conn = psycopg2.connect("host=localhost dbname=test user=postgres password=farol12g")
    cur = conn.cursor()
    with open(file, 'r') as f:
        next(f) # Skip the header row.
        cur.copy_from(f, TABLENAME.lower(), sep=';', null ="")
    conn.commit()
    cur.close()
    conn.close()
    

def dropTable(TABLENAME):
    conn = psycopg2.connect("host=localhost dbname=test user=postgres password=farol12g")
    cur = conn.cursor()
    cur.execute(f'DROP TABLE {TABLENAME.lower()};')
    conn.commit()
    cur.close()
    conn.close()

In [4]:
dataPath = '../csvTest'
file = [os.path.join(f'{dataPath}/{d}/{f}') for d in os.listdir(f'{dataPath}') if os.path.isdir(f'{dataPath}/{d}') for f in os.listdir(f'{dataPath}/{d}') if '.csv' in f]

for f in file:
    STUDYNAME = f.split('/')[2]
    TABLENAME = f.split('/')[3].removesuffix('.csv')

    conn = psycopg2.connect("host=localhost dbname=test user=postgres password=farol12g")
    cur = conn.cursor()
    cur.execute(f"""SELECT to_regclass('public.{TABLENAME.lower()}');""")
    tableCheck = cur.fetchone()[0]
    cur.close()
    conn.close()
    
    if TABLENAME.lower() == tableCheck:
        dropTable(TABLENAME)
         
        df, dtDict = loadDataAndCreateColumns(f, STUDYNAME) 
        endFile = f'{f[:-4]}_autogenerated.csv' 
        df.to_csv(endFile, index = False, sep = ';')

        createTable(dtDict, TABLENAME)
        addData(endFile, TABLENAME)
        os.remove(endFile)
    else:
        try:
            df, dtDict = loadDataAndCreateColumns(f, STUDYNAME)
        except:
            print(f'The formatting for table {f} is not right. Ignoring it and continuing.')
            continue
        endFile = f'{f[:-4]}_autogenerated.csv' 
        df.to_csv(endFile, index = False, sep = ';')

        createTable(dtDict, TABLENAME)
        addData(endFile, TABLENAME)
        
        os.remove(endFile)

File delimiter is ,



  df[k] = df[k].astype(str).str.replace('\.0$', '')


File delimiter is ,

File delimiter is ;

The formatting for table ../csvTest/testStudy/gripTest.csv is not right. Ignoring it and continuing.
File delimiter is ;

File delimiter is ;

File delimiter is ;

