# Installation

In [1]:
#!pip install sqlalchemy psycopg2-binary

# Import

In [2]:
from sqlalchemy import create_engine
from sqlalchemy import text
import pandas as pd

pd.set_option("display.float_format", "{:,.2f}".format)

# Connection Parameters

In [9]:
user = "postgres"
password = "admin"
host = "localhost"
port = "5432"
database = "hdb"

# Class: PSQL

In [4]:
class PSQL:

    #---Initialization---------------------------------------------------------------------
    def __init__(self):
        self.engine = None
        self.get_connection()

    #---Get Database Connection------------------------------------------------------------
    def get_connection(self):
        # PostgreSQL connection URL
        connection_url = f"postgresql+psycopg2://{user}:{password}@{host}:{port}/{database}"
        
        # Create SQLAlchemy engine
        engine = create_engine(connection_url, isolation_level="AUTOCOMMIT")
        
        # Test connection
        try:
            with engine.connect() as connection:
                print("Connected successfully!")
        except Exception as e:
            print("Connection failed:", e)

        self.engine = engine
        self.connection_url = connection_url

    #---Execute SQL Query------------------------------------------------------------------
    def query(self,sql, quiet=True):

        if str(sql).strip()=='':
            return None

        results =[]
        with self.engine.connect() as conn:
            result = conn.execute(sql)
            rows = result.fetchall()
    
        count=0
        for row in rows:
            if count<5 and not quiet:
                print(row)
                count += 1
            results.append(row)
        
        df = pd.DataFrame(results)
        df.index = df.index + 1
        print(f"Total Rows: {len(df)}")
        return df 

    #---Execute SQL Query------------------------------------------------------------------
    def execute(self,sql, quiet=True):

        if str(sql).strip()=='':
            return None

        results =[]
        with self.engine.connect() as conn:
            result = conn.execute(sql)

        return result

# Usage

### Initialization

In [8]:
psql=PSQL()

#sql=text("CREATE DATABASE hdb;")
#psql.execute(sql)

Connected successfully!


<sqlalchemy.engine.cursor.CursorResult at 0x14bac9550>

<hr style="border: 2px solid red;" />
<hr style="border: 2px solid blue;" />
<hr style="border: 2px solid green;" />

In [None]:
def populate_table(data_file, table_name):

    df = pd.read_csv(data_file)
    
    # Create engine
    engine = create_engine(psql.connection_url)
    
    # Insert into table
    df.to_sql(table_name, engine, if_exists="replace", index=False)

    sql = text(f"SELECT count(*) FROM {table_name}")
    result = psql.query(sql)
    counts = result.iloc[0].values[0]
    
    print(f"CSV: {data_file} imported successfully. Numer of recrods : {counts}")


In [None]:
datasets = {
    'stat_monthly.csv': 'stat_monthly',
    'stat_yearly.csv': 'stat_yearly',
    'Main.csv': 'main'
}

In [None]:
1/0

In [None]:
populate_table(data_file="datasets/GDP.csv", table_name="gdp")

In [None]:
populate_table(data_file="datasets/HDB_Resale_Price.csv", table_name="gdp")

In [None]:
sql = text("SELECT * FROM hdb_resale_price")
df = psql.query(sql)
df

<hr style="border: 2px solid red;" />
<hr style="border: 2px solid blue;" />
<hr style="border: 2px solid green;" />

In [None]:
df.info()

In [None]:
df['year_month'] = pd.to_datetime(df['year_month'], format="%Y-%m-%d")

In [None]:
df.set_index("year_month", inplace=True)

In [None]:
df

In [None]:
df = pd.read_csv("datasets/M810051_births.csv",
                 skiprows=9, header=1, nrows=15)

df_t = df.transpose()
df_t.columns = df_t.iloc[0]
df_t = df_t.iloc[1:]
df_t.columns.name = None
df_t.index.name = "year_month"
df_t = df_t.sort_index(ascending=True)
df_t.to_csv("datasets/M810051_births_new.csv", index=True)
df_t

In [None]:
df = pd.read_csv("datasets/M830162_marriages.csv", skiprows=9, header=1, nrows=39)
df_t = df.transpose()
df_t.columns = df_t.iloc[0]
df_t = df_t.iloc[1:]
df_t.columns.name = None
df_t.index.name = "year"
df_t.to_csv("datasets/M830162_marriages_new.csv", index=True)
df_t

In [None]:
def process_csv_file(filename, nrows, skiprows=9, index_name='year_month'):

    subfoler = 'datasets'
    inputFile = f"{subfoler}/{filename}"
    outputFile = filename.replace(".csv", "_new.csv")
    outputFile = f"{subfoler}/{outputFile}"


    df = pd.read_csv(inputFile, skiprows=skiprows, header=0, nrows=nrows,
               )
    
    df_t = df.transpose()
    df_t.columns = df_t.iloc[0]
    df_t = df_t.iloc[1:]
    df_t.columns.name = None
    df_t.index.name = index_name
    df_t = df_t.sort_index(ascending=True)
    df_t.to_csv(outputFile, index=True)

    df_sample = pd.concat([df_t.head(5), df_t.tail(5)])
        
    return df_t, df_sample, df

In [None]:
df, df_sample, dfx = process_csv_file('unemployment.csv', skiprows=0, nrows=15, index_name='year_month')
df_sample

In [None]:
df, df_sample, dfx = process_csv_file('M810661_Households_Living.csv',
                                      nrows=15, skiprows=10, index_name='year')
df.index

In [None]:
df, df_sample, df_raw = process_csv_file('M810051_births.csv', nrows=15, skiprows=10, index_name='year_month')
df

In [None]:
df, df_sample, df_raw = process_csv_file('M830250_divorces.csv', nrows=39, skiprows=10,index_name='year')
df_sample

In [None]:
df, df_sample, df_raw = process_csv_file('M830162_marriages.csv', nrows=39, skiprows=10, index_name='year')
df_sample

In [None]:
df = pd.read_csv('datasets/Resale_HDB_MRT_Distance.csv', index_col=None,
                usecols=lambda col: not col.startswith("Unnamed"),
                 low_memory=False)
df.to_csv('datasets/Resale_HDB_MRT_Distance_new.csv', index=None)
df.head()

In [None]:
df = pd.read_csv('datasets/TravellingDistance.csv', index_col=None,
                usecols=lambda col: not col.startswith("Unnamed"),
                 low_memory=False)
df.to_csv('datasets/TravellingDistance_new.csv', index=None)
df.head()