In [3]:
from llama_index.readers.pdf_table import PDFTableReader
from pathlib import Path

reader = PDFTableReader()
pdf_path = Path("z1.pdf")
documents = reader.load_data(file=pdf_path, pages="11")

In [4]:
documents

[]

In [7]:
import pandas as pd

# Specify the path to your tab-delimited text file
file_path = '~/financial_system_graph/z1data/z1_csv_files/data_dictionary/b101.txt'

# Read the tab-delimited text file into a pandas DataFrame
df = pd.read_csv(file_path, delimiter='\t')

# Display the first few rows of the DataFrame
df.head()

Unnamed: 0,FL152000005.Q,Households and nonprofit organizations; total assets,Line 1,B.101 Balance Sheet of Households and Nonprofit Organizations,"Millions of dollars; amounts outstanding end of period, not seasonally adjusted"
0,LM152010005.Q,Households and nonprofit organizations; nonfin...,Line 2,B.101 Balance Sheet of Households and Nonprofi...,Millions of dollars; amounts outstanding end o...
1,LM155035005.Q,Households and nonprofit organizations; real e...,Line 3,B.101 Balance Sheet of Households and Nonprofi...,Millions of dollars; amounts outstanding end o...
2,LM155035015.Q,Households; owner-occupied real estate includi...,Line 4,B.101 Balance Sheet of Households and Nonprofi...,Millions of dollars; amounts outstanding end o...
3,LM165035005.Q,Nonprofit organizations; real estate at market...,Line 5,B.101 Balance Sheet of Households and Nonprofi...,Millions of dollars; amounts outstanding end o...
4,LM165015205.Q,"Nonprofit organizations; equipment, current co...",Line 6,B.101 Balance Sheet of Households and Nonprofi...,Millions of dollars; amounts outstanding end o...


In [1]:
import psycopg2

# Database connection parameters
db_name = "z1data"
db_user = "gma"
db_password = "gmapass"
db_host = "localhost"
db_port = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(
    dbname=db_name,
    user=db_user,
    password=db_password,
    host=db_host,
    port=db_port
)

# Create a cursor object
cur = conn.cursor()

In [2]:
cur

<cursor object at 0x78533dd5b2e0; closed: 0>

In [21]:
import psycopg2
from psycopg2 import sql

# Database connection parameters
db_params = {
    'dbname': 'z1data',
    'user': 'gma',
    'password': 'gmapass',
    'host': 'localhost',
    'port': '5432'
}

# SQL command to create the table
create_table_query = """
CREATE TABLE z1_meta_data (
    seriesid VARCHAR(100),
    long_names VARCHAR(255),
    table_position VARCHAR(50),
    table_name TEXT,
    annotations TEXT 
);
"""
#,
#    PRIMARY KEY (seriesid, table_name)

try:
    # Connect to the PostgreSQL database
    conn = psycopg2.connect(**db_params)
    cursor = conn.cursor()
    
    # Execute the SQL command to create the table
    cursor.execute(create_table_query)
    
    # Commit the changes
    conn.commit()
    
    print("Table z1_meta_data created successfully.")
    
except Exception as error:
    print(f"Error creating table: {error}")
    
finally:
    # Close the cursor and connection
    if cursor:
        cursor.close()
    if conn:
        conn.close()

Table z1_meta_data created successfully.


In [22]:
import os
import pandas as pd
import psycopg2
from sqlalchemy import create_engine
import glob

# Database connection parameters
db_name = "z1data"
db_user = "gma"
db_password = "gmapass"
db_host = "localhost"
db_port = "5432"

# Directory containing the text files
data_directory = os.path.expanduser("~/financial_system_graph/z1data/z1_csv_files/data_dictionary/")

# Create a connection to the PostgreSQL database
engine = create_engine(f'postgresql+psycopg2://{db_user}:{db_password}@{db_host}:{db_port}/{db_name}')

# Function to read and load data from text files into the table
def load_data_from_files(directory):
    # Get all text files in the directory except those containing 'transactions' in their name
    files = glob.glob(os.path.join(data_directory, '*.txt'))
    files_to_load = [f for f in files if 'transactions' not in os.path.basename(f)]
        
    for file_path in files_to_load:
        df = pd.read_csv(file_path, sep='\t', header=None)
        # Check if the DataFrame has five columns
        if df.shape[1] == 5:
            df.columns = ['seriesid', 'long_names', 'table_position', 'table_name', 'annotations']
            # Remove duplicate rows
            df.drop_duplicates(inplace=True)
            try:
                # Load the DataFrame into the PostgreSQL table
                df.to_sql('z1_meta_data', engine, if_exists='append', index=False)
            except Exception as e:
                print(f"Error writing to database for file {file_path}: {e}")
        else:
            print(f"File {file_path} does not have exactly five columns. Skipping.")



# Load data from text files into the table
load_data_from_files(data_directory)

print("Data loaded into z1_meta_data table successfully.")

Data loaded into z1_meta_data table successfully.
