In [2]:
import pandas as pd
import sqlalchemy as db
from sqlalchemy import create_engine, text
from sqlalchemy.orm import sessionmaker
import pickle as pkl

import time

def pull_data(sql_query:str, file_out=None, port_num=2023, db_name='public'):
    """
    For a given sql_query, this function will connect to Perchwell's database, pull
    the corresponding data, save it to a csv in the file specified (default is 
    pw.csv), and returns that data in the form of a Pandas DataFrame.

    Args:
        sql_query (str): query whose results you wish to download
        file_out (str, optional): filepath for the output file. If None, then
            no file is created. Defaults to None.
        port_num (int, optional): Port number for Teleport Connection. Defaults
            to 2023.

    Returns:
        pd.DataFrame: DataFrame containing results of sql_query 
    """    
    if sql_query[-1] == ';':
        sql_query = sql_query[:-1]
    pw_df = []
    current_row = 0
    # Create the SQLAlchemy engine
    engine = create_engine(\
        f"postgresql://teleport:@localhost:{port_num}/{db_name}")

    query_string = f"""{sql_query}
        -- NOTE: DO NOT DELETE BELOW THIS
        LIMIT 10000 OFFSET {current_row};
        """
    start = time.time()
    try:
        # Execute the query
        print(f'new offset: {current_row}')
        with engine.connect() as conn:
            result = conn.execute(text(query_string))

            # Process the query result
            for row in result:
                # Access the row data
                pw_df.append(row)
    except:
        # in case of timeout
        engine = create_engine(\
            f"postgresql://teleport:@localhost:{port_num}/{db_name}")
        with engine.connect() as conn:
            result = conn.execute(text(query_string))
            for row in result:
                pw_df.append(row)
    pw_df = pd.DataFrame(pw_df)
    if file_out:
        pw_df.to_csv(file_out)
    return pw_df

In [8]:
with open('/Users/arnavmalhotra/Desktop/data-quality/reports/daniel_gale/monthly_report/sql/Brooklyn and Queens Closings.sql', 'r') as f:
    query = f.read()
pull_data(query, file_out='daniel_gale_sales_jan.csv', db_name='perchwell')

new offset: 0


Unnamed: 0,mls_id,listing_id,address,unit,full_address,sold_rented_price,original_price,discount_on_ask,sale_or_rental,list_date,...,listing_url,agent1_name,agent1_email,agent1_phone,agent2_name,agent2_email,agent2_phone,agent3_name,agent3_email,agent3_phone
0,3505208,7775845,65-58 ALDERTON STREET,,"65-58 ALDERTON STREET,",950000,950000.0,0.00000,Sale,2023-09-20 04:00:00,...,http://www.elliman.com,Christina Riccobono,christina.riccobono@elliman.com,,,,,,,
1,3510294,7778229,176-17 129 AVENUE,,"176-17 129 AVENUE,",680000,688000.0,-1.16279,Sale,2023-10-27 04:00:00,...,,Rafael Ching,rafael@rafaelchingteam.com,,,,,,,
2,COMP-1283540300387147273,7743215,7905 Seaview Ave,3,"7905 Seaview Ave, 3",410000,425000.0,-3.52941,Sale,2023-04-06 04:00:00,...,https://www.compass.com/listing/12835403003871...,Hakim Edwards,hakim.edwards@compass.com,(347) 489-2247,,,,,,
3,3487127,7636452,204-15 26 AVENUE,,"204-15 26 AVENUE ,",948000,988000.0,0.00000,Sale,2023-06-23 04:00:00,...,,Po Wei Tair,davidptair@gmail.com,,,,,,,
4,COMP-1394443252756129777,7745389,1208 Pacific St,3H,"1208 Pacific St, 3H",642500,695000.0,-1.15385,Sale,2023-08-06 04:00:00,...,https://www.compass.com/listing/13944432527561...,Skye Whitman,sw@compass.com,(347) 406-0548,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
866,3497200,7772818,186-01 WEXFORD TERRACE,,"186-01 WEXFORD TERRACE,",860000,865000.0,-0.57803,Sale,2023-08-10 04:00:00,...,,Rafael Ching,rafael@rafaelchingteam.com,,,,,,,
867,OLRS-2064048,7735783,37-26 32nd Street,3-A,"37-26 32nd Street, 3-A",1310000,1310000.0,0.00000,Sale,2023-10-09 04:00:00,...,https://www.nestseekers.com/3132284/eden-condo...,Stelios Hiotis,steliosh@nestseekers.com,(212) 252-8772,,,,,,
868,3447930,4304662,32-25 88,607,"32-25 88, 607",235000,259000.0,-9.26641,Sale,2022-12-11 05:00:00,...,,Vilma Abreu,lissetteabreu2003@yahoo.com,,,,,,,
869,COMP-1314519968269846249,7744481,171 N 1st St,4A,"171 N 1st St, 4A",945000,945000.0,0.00000,Sale,2023-05-18 04:00:00,...,https://www.compass.com/listing/13145199682698...,Marshal Li,marshal.li@compass.com,(917) 216-0661,,,,,,


: 

In [4]:
query = '''select distinct source from listings'''
pull_data(query, db_name='perchwell')

new offset: 0


In [2]:
query = '''select max(price) 
from listing_histories 
where listing_id in (select id from listings where source='crmls')'''
pull_data(query, db_name='perchwell')

new offset: 0


Unnamed: 0,max
0,2100000000


In [4]:
query = '''select *
from listing_histories 
where price=2100000000'''
pull_data(query, db_name='perchwell')

new offset: 0


Unnamed: 0,id,price,status_code,listing_id,property_tax,maintenance,created_at,updated_at,update_transaction,source_id,updater,source,rental_availability
0,345788587,2100000000,100,22660832,,,2024-01-02 17:07:57.647377,2024-01-02 17:07:57.647377,2008-06-01 04:00:00,,crmls-rules,,
1,346307917,2100000000,100,22766570,,,2024-01-02 17:08:12.513870,2024-01-02 17:08:12.513870,2011-09-03 04:01:00,,crmls-rules,,
2,346307919,2100000000,620,22766570,,,2024-01-02 17:08:12.513870,2024-01-02 17:08:12.513870,2011-09-04 17:00:00,,crmls-rules,,


In [4]:
query = '''with status as (
	select listing_id
		, status_code
		, price
		, update_transaction
		, row_number() over(partition by listing_id order by update_transaction desc) as rank
	from listing_histories 
    where listing_id in (22660832,22766570)
)
select id, address, zip, list_date, hidden, published, status_code as last_status, price as last_price, update_transaction as last_event
from listings inner join status on status.listing_id = listings.id and rank=1
where id in (22660832,22766570)'''
pull_data(query, db_name='perchwell')

new offset: 0


Unnamed: 0,id,address,zip,list_date,hidden,published,last_status,last_price,last_event
0,22660832,4824 West Point Loma Blvd,92107,2008-06-01 04:00:00,False,True,300,1580000,2008-12-05 05:00:00
1,22766570,1481 Knollpark Glen,92026,2011-09-03 04:00:00,False,True,620,2100000000,2011-09-04 17:00:00


: 