### Import Data Notebook

This notebook should give me an easily accessible way of importing data from the large dataset in Snowflake to my own custom tables

In [5]:
%load_ext autoreload

In [12]:
%autoreload

In [2]:
import sys, os
import pandas as pd
import numpy as np
from dotenv import load_dotenv

sys.path.append('/Users/katringrunert/Projects/Uni/thesis/neuro-symbolic-demand-forecasting')
# when running on JupyterHub the dot-env-path needs to be adapted
load_dotenv("../.env")

datetime_format = "%Y-%m-%d %H:%M%z"

from src.neuro_symbolic_demand_forecasting.database.weather_postgres import WeatherDao
from src.neuro_symbolic_demand_forecasting.database.snowflake import SnowflakeDao


sf_dao = SnowflakeDao(
    url=os.getenv("SNOWFLAKE_URL"),
    uid=os.getenv("SNOWFLAKE_UID"),
    pwd=os.getenv("SNOWFLAKE_PASSWORD"),
    database=os.getenv("SNOWFLAKE_DATABASE"),
    schema=os.getenv("SNOWFLAKE_SCHEMA"),
)
pg_dao = WeatherDao(
    url=os.getenv("POSTGRES_HOST_WEATHER_DATA"),
    uid=os.getenv("POSTGRES_UID_WEATHER_DATA"),
    pwd=os.getenv("POSTGRES_PASSWORD_WEATHER_DATA"),
    database=os.getenv("POSTGRES_DATABASE_WEATHER_DATA"),
    schema=os.getenv("POSTGRES_SCHEMA_WEATHER_DATA"),
    port=int(os.getenv("POSTGRES_PORT_WEATHER_DATA"))
)
sf_dao.connect()
pg_dao.connect()

Connected to snowflake instance!
Connected to postgres host!


## Subsampling from ACTIVE_CONTRACT_DATA (old and big) to CONTRACT_DATA (new and smaller)

In [3]:
contract_df = pd.read_csv('../unclean_data/active_contract_data.csv', parse_dates=['CONTRACT_START_DATE', 'CONTRACT_END_DATE'])
contract_df.columns = [ c.lower() for c in  contract_df.columns]
contract_df

Unnamed: 0,clusterreference,degreeofisolation,hassolarpanels,solarpanelswattpeak,surfacearea,typeofhouse,yearbuilt,numberofpeople,connection_id,ean,...,contract_end_date,contract_state,pricing_type,market_segment,connection_group_id,customer_size,zip_code,city,zip_code_suffixed,last_modified
0,1068WG15,Average,False,0,From50,CornerHouse,From1946,Three,fa3814dc-a4eb-4153-88ba-ae580114d07b,871685900008617662,...,2024-10-17,Active,Fixed,Electricity,70dacb49-3ae3-496f-8319-ae580114d068,Small,1068,AMSTERDAM,1068 WG,2024-03-19T10:42:02.000Z
1,8426AC19-A,Average,False,0,From15,SemiDetached,From1975,Two,eca5959e-c22e-4658-86bd-a72901420930,871687120052650035,...,2024-04-06,Active,Fixed,Electricity,577c4d5b-fd03-4d26-8ecf-a729014208ee,Small,8426,APPELSCHA,8426 AC,2024-03-19T10:42:02.000Z
2,6718XB5-Nieuw,Good,True,0,From200,Detached,From2015,Three,7c0fc9ba-7638-4da8-a691-adb700d14346,871687110004168671,...,2024-04-02,Active,Fixed,Electricity,08e48d02-88d5-4a36-a358-adb700d0ed9d,Small,6718,EDE GLD,6718 XB,2024-03-19T10:42:02.000Z
3,2678ZP127,Good,True,1320,From100,RowHouse,From2015,Two,d6b658c4-58ad-4364-8d20-afcf00aae00f,871687800001680992,...,2024-04-05,Active,Fixed,Electricity,7ad83cb4-2a48-44b7-8ba5-afcf00aae00f,Small,2678,DE LIER,2678 ZP,2024-03-19T10:42:02.000Z
4,1181VG56,Good,True,4,From150,RowHouse,From1946,Three,2eaf249f-0b7c-40c4-b3ce-ac4b00ddb45a,871685900009562541,...,2024-10-30,Active,Fixed,Electricity,382fe708-2ddf-4d28-b4f6-ac4b00ddb45a,Small,1181,AMSTELVEEN,1181 VG,2024-03-19T10:42:02.000Z
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22608,3039SR25-B,Average,False,0,From50,Apartment,Until1945,Two,718a6b95-b89e-4a82-bd80-b00a00a28e17,871689290101026123,...,2024-05-23,Active,Fixed,Electricity,6044d5f2-5002-42f0-ae19-b00a00a28e17,Small,3039,ROTTERDAM,3039 SR,2024-03-19T10:44:05.000Z
22609,4531HW6,Average,False,0,From50,Apartment,From1975,One,c2198c6e-e0c8-4666-a9b3-ab0f014edc80,871690200022422658,...,2024-05-15,Active,Fixed,Electricity,dc18e5c9-79cf-414e-81a6-ab0f014edc80,Small,4531,TERNEUZEN,4531 HW,2024-03-19T10:44:05.000Z
22610,8913GV19,Poor,False,0,From15,CornerHouse,Until1945,One,108fd278-86d6-47f0-8955-abf700f2d183,871689740000025616,...,2024-09-22,Active,Fixed,Electricity,4182b295-ba28-412b-ab14-abf700f2d183,Small,8913,LEEUWARDEN,8913 GV,2024-03-19T10:44:05.000Z
22611,6351JG33,Average,True,0,From150,SemiDetached,From1946,Four,6f60138a-033e-4894-80f0-acdb0104569b,871688540000224191,...,2024-05-24,Active,Fixed,Electricity,96797ac0-4da2-4b01-8492-acdb0104569b,Small,6351,BOCHOLTZ,6351 JG,2024-03-19T10:44:05.000Z


In [4]:
import datetime
contract_df['runtime_total'] = (contract_df['contract_end_date'] - contract_df['contract_start_date']).apply(lambda x: x.days)
contract_df['runtime_until_now'] = contract_df['contract_start_date'].apply(lambda x: (datetime.datetime(2024,1,1,0,0) - x).days)
contract_df[['runtime_total', 'runtime_until_now', 'contract_end_date', 'contract_start_date']].head()

Unnamed: 0,runtime_total,runtime_until_now,contract_end_date,contract_start_date
0,947,657,2024-10-17,2022-03-15
1,366,270,2024-04-06,2023-04-06
2,822,730,2024-04-02,2022-01-01
3,366,271,2024-04-05,2023-04-05
4,1096,793,2024-10-30,2021-10-30


In [5]:
filtered_df = contract_df[contract_df['runtime_until_now']>730]#[['runtime_total', 'runtime_until_now', 'contract_start_date', 'contract_end_date']]
print(filtered_df['contract_start_date'].max())
filtered_df[['runtime_total', 'runtime_until_now', 'contract_start_date', 'contract_end_date']]

2021-12-31 00:00:00


Unnamed: 0,runtime_total,runtime_until_now,contract_start_date,contract_end_date
4,1096,793,2021-10-30,2024-10-30
8,1096,754,2021-12-08,2024-12-08
9,1096,883,2021-08-01,2024-08-01
10,1035,914,2021-07-01,2024-05-01
15,1096,941,2021-06-04,2024-06-04
...,...,...,...,...
22595,1096,937,2021-06-08,2024-06-08
22596,1096,944,2021-06-01,2024-06-01
22597,1096,907,2021-07-08,2024-07-08
22601,1096,822,2021-10-01,2024-10-01


In [6]:
# Samples:  15205 	 PV:  39.97  	Non-PV: 60.03
# aim to upkeep this ratio
pv=39/100.0
non_pv=1-pv

len(filtered_df[filtered_df['hassolarpanels']])/len(filtered_df), len(filtered_df[~filtered_df['hassolarpanels']])/len(filtered_df)

(0.4044304351632957, 0.5955695648367043)

In [7]:
pv_total = len(filtered_df[filtered_df['hassolarpanels']])
nonpv_totla = len(filtered_df[~filtered_df['hassolarpanels']])
total = len(filtered_df)

# pv_total, int(total*pv)
print((total-int(total*pv))/total)
print(int(total*pv))
subsampled = pd.concat([filtered_df[filtered_df['hassolarpanels']].sample(n=int(total*pv)-100), filtered_df[~filtered_df['hassolarpanels']]])
subsampled

0.6100166360213641
4454


Unnamed: 0,clusterreference,degreeofisolation,hassolarpanels,solarpanelswattpeak,surfacearea,typeofhouse,yearbuilt,numberofpeople,connection_id,ean,...,pricing_type,market_segment,connection_group_id,customer_size,zip_code,city,zip_code_suffixed,last_modified,runtime_total,runtime_until_now
22015,8602AA24,Poor,True,270,From100,Detached,Until1945,Two,1008b493-2f0f-465c-8508-ac8d00b1c9c3,871687120052157480,...,Fixed,Electricity,4f6c1716-a130-4b45-9f6f-ac8d00b1c9c3,Small,8602,SNEEK,8602 AA,2024-03-19T10:44:05.000Z,1096,975
7564,6932DP11,Average,True,0,From100,SemiDetached,From1992,Two,b3687bca-7e5d-4bfd-8d9b-ad6300ad9e7d,871687120054410132,...,Fixed,Electricity,958d07e3-2396-4de2-a7ae-ad6300ad9e7d,Small,6932,WESTERVOORT,6932 DP,2024-03-19T10:44:05.000Z,1096,864
2065,1826HC86,Average,True,0,From50,Apartment,From1975,Two,eb0f50ac-33bd-4c7f-b486-aad900bcbe28,871685900012386172,...,Fixed,Electricity,32b70796-e8ff-4055-8425-aad900bcbe28,Small,1826,ALKMAAR,1826 HC,54241276-08-13T05:36:18.000Z,1096,822
11004,9945PD101,Average,True,305,From50,Detached,Until1945,Three,1c32f431-9a7f-44e4-9653-ac5900c28285,871694840002706567,...,Fixed,Electricity,9706843c-5903-4606-a2fd-ac5900c28285,Small,9945,WAGENBORGEN,9945 PD,2024-03-19T10:44:05.000Z,1096,761
2492,3544JH7,Good,True,2100,From100,RowHouse,From2015,Four,f729f51a-a0f9-4987-8508-ad96015ffddb,871687460011514757,...,Fixed,Electricity,bbb48dee-c028-4469-a8f7-ad96015ffddb,Small,3544,UTRECHT,3544 JH,54241276-08-13T05:36:18.000Z,1096,829
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
22593,6824BG47-402,Good,False,0,From50,Apartment,From2015,One,dc6a400a-bee8-46cb-9be8-ad3900ef53b9,871687110003927996,...,Fixed,Electricity,3268ee71-efd9-41c4-98c8-ad3900ef53b9,Small,6824,ARNHEM,6824 BG,2024-03-19T10:44:05.000Z,1096,938
22595,3448CA5,Poor,False,0,From15,Apartment,From1975,One,8f779647-e29a-49e8-a017-a8d501480eca,871687400005418999,...,Fixed,Electricity,8fb15b1c-c2de-459c-b773-a8d501480eca,Small,3448,WOERDEN,3448 CA,2024-03-19T10:44:05.000Z,1096,937
22596,6462GT37,Average,False,0,From50,Apartment,From1975,One,e5ca7e4a-4e56-41f9-8301-ad2800ee5ec2,871688540003383000,...,Fixed,Electricity,6bdba9cf-d4cc-410a-a7e5-ad2800ee5ec2,Small,6462,KERKRADE,6462 GT,2024-03-19T10:44:05.000Z,1096,944
22597,7161RB5,Average,False,0,From250,Detached,Until1945,Two,c06df8e5-fea2-4357-adf4-a99300a32722,871687120055934361,...,Fixed,Electricity,0f2593f2-0286-454b-8d06-a99300a32722,Small,7161,NEEDE,7161 RB,2024-03-19T10:44:05.000Z,1096,907


In [8]:
len(subsampled[subsampled['hassolarpanels']])/len(subsampled), len(subsampled[~subsampled['hassolarpanels']])/len(subsampled)

(0.39028325564718536, 0.6097167443528146)

In [9]:
df_for_insert = subsampled[['clusterreference',
 'degreeofisolation',
 'hassolarpanels',
 'solarpanelswattpeak',
 'surfacearea',
 'typeofhouse',
 'yearbuilt',
 'numberofpeople',
 'connection_id',
 'ean',
 'ean_sha256',
 'previous_contract_id',
 'contract_id',
 'initial_contract_id',
 'parent_contract_id',
 'contract_start_date',
 'contract_end_date',
 'contract_state',
 'pricing_type',
 'market_segment',
 'connection_group_id',
 'customer_size',
 'zip_code',
 'city',
 'zip_code_suffixed',
 'last_modified']]


df_for_insert['last_modified'] = datetime.datetime.now()
# df_for_insert.columns = [column.upper() for column in df_for_insert.columns]

df_for_insert.reset_index(inplace=True, drop=True)
df_for_insert['contract_start_date'] = df_for_insert['contract_start_date'].apply(lambda x: x.strftime('%Y-%m-%d'))
df_for_insert['contract_end_date'] = df_for_insert['contract_end_date'].apply(lambda x: x.strftime('%Y-%m-%d'))
df_for_insert['last_modified'] = df_for_insert['last_modified'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S'))
df_for_insert

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_for_insert['last_modified'] = datetime.datetime.now()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_for_insert['contract_start_date'] = df_for_insert['contract_start_date'].apply(lambda x: x.strftime('%Y-%m-%d'))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_for_insert['contract_end_date

Unnamed: 0,clusterreference,degreeofisolation,hassolarpanels,solarpanelswattpeak,surfacearea,typeofhouse,yearbuilt,numberofpeople,connection_id,ean,...,contract_end_date,contract_state,pricing_type,market_segment,connection_group_id,customer_size,zip_code,city,zip_code_suffixed,last_modified
0,8602AA24,Poor,True,270,From100,Detached,Until1945,Two,1008b493-2f0f-465c-8508-ac8d00b1c9c3,871687120052157480,...,2024-05-01,Active,Fixed,Electricity,4f6c1716-a130-4b45-9f6f-ac8d00b1c9c3,Small,8602,SNEEK,8602 AA,2024-05-23 11:47:37
1,6932DP11,Average,True,0,From100,SemiDetached,From1992,Two,b3687bca-7e5d-4bfd-8d9b-ad6300ad9e7d,871687120054410132,...,2024-08-20,Active,Fixed,Electricity,958d07e3-2396-4de2-a7ae-ad6300ad9e7d,Small,6932,WESTERVOORT,6932 DP,2024-05-23 11:47:37
2,1826HC86,Average,True,0,From50,Apartment,From1975,Two,eb0f50ac-33bd-4c7f-b486-aad900bcbe28,871685900012386172,...,2024-10-01,Active,Fixed,Electricity,32b70796-e8ff-4055-8425-aad900bcbe28,Small,1826,ALKMAAR,1826 HC,2024-05-23 11:47:37
3,9945PD101,Average,True,305,From50,Detached,Until1945,Three,1c32f431-9a7f-44e4-9653-ac5900c28285,871694840002706567,...,2024-12-01,Active,Fixed,Electricity,9706843c-5903-4606-a2fd-ac5900c28285,Small,9945,WAGENBORGEN,9945 PD,2024-05-23 11:47:37
4,3544JH7,Good,True,2100,From100,RowHouse,From2015,Four,f729f51a-a0f9-4987-8508-ad96015ffddb,871687460011514757,...,2024-09-24,Active,Fixed,Electricity,bbb48dee-c028-4469-a8f7-ad96015ffddb,Small,3544,UTRECHT,3544 JH,2024-05-23 11:47:37
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
11151,6824BG47-402,Good,False,0,From50,Apartment,From2015,One,dc6a400a-bee8-46cb-9be8-ad3900ef53b9,871687110003927996,...,2024-06-07,Active,Fixed,Electricity,3268ee71-efd9-41c4-98c8-ad3900ef53b9,Small,6824,ARNHEM,6824 BG,2024-05-23 11:47:37
11152,3448CA5,Poor,False,0,From15,Apartment,From1975,One,8f779647-e29a-49e8-a017-a8d501480eca,871687400005418999,...,2024-06-08,Active,Fixed,Electricity,8fb15b1c-c2de-459c-b773-a8d501480eca,Small,3448,WOERDEN,3448 CA,2024-05-23 11:47:37
11153,6462GT37,Average,False,0,From50,Apartment,From1975,One,e5ca7e4a-4e56-41f9-8301-ad2800ee5ec2,871688540003383000,...,2024-06-01,Active,Fixed,Electricity,6bdba9cf-d4cc-410a-a7e5-ad2800ee5ec2,Small,6462,KERKRADE,6462 GT,2024-05-23 11:47:37
11154,7161RB5,Average,False,0,From250,Detached,Until1945,Two,c06df8e5-fea2-4357-adf4-a99300a32722,871687120055934361,...,2024-07-08,Active,Fixed,Electricity,0f2593f2-0286-454b-8d06-a99300a32722,Small,7161,NEEDE,7161 RB,2024-05-23 11:47:37


## Import Timeseries

In [10]:
from src.neuro_symbolic_demand_forecasting.helpers import constants
from snowflake.connector.pandas_tools import write_pandas
import snowflake.connector as sc


In [18]:

snowflake_connection = sc.connect(
    user=os.getenv("SNOWFLAKE_UID"),
    password=os.getenv("SNOWFLAKE_PASSWORD"),
    account=os.getenv("SNOWFLAKE_URL"),
    warehouse='USER_WH',
    # region=self.snowflake_parameters.region,
    schema="KATRIN",
    database="SANDBOX",
    role="DATA_ENGINEER",
)
# write_pandas(
#     conn=snowflake_connection,
#     df=df_for_insert,
#     table_name="CONTRACT_DATA"
# )
# contract_df = sf_dao.fetch_list("SELECT * FROM SANDBOX.KATRIN.CONTRACT_DATA")
# contract_df

In [11]:
contract_df = df_for_insert
contract_df['ean_sha256']

0        d2df482e037a1129250b128d8b73fe11f5bc127f0850e0...
1        9c716b6d896129415d5955a25e907e0d7cfdaf8360e02a...
2        95284a16e3b726c8093d1b308e24837e2e4add5fa1280f...
3        865fefd8550d2d2185d422f15f0195c995a7a06961e2bd...
4        95f4d0ac666e4b0b55642dc5f78bc07d5228bceeb2aa05...
                               ...                        
11151    9a34265969c949f1e49555b0c21026a6d31cc07dee76bb...
11152    1803c7ac5248ba82bf0443c1e1dcf69c55066a79fcfebd...
11153    a6773471e60dabd9928dbcac4d69080cc364b474c93291...
11154    2adfb7bef04d922543069a74caefc3c5eae8a6291b7199...
11155    56166c7c52c5a2f1686c923294d7cdf7fd24688cc906fd...
Name: ean_sha256, Length: 11156, dtype: object

In [20]:
eans = [f"'{e.upper()}'" for e in contract_df['ean_sha256'].tolist()]
# month,year = 2, 2023
# table_name = "P4_METERREADING_WINTER"
# so far:
# 1, 2022, 2023
# 2, 2022, 2023
# 3, 2022
# 10, 2022
# 11, 2022
# 12, 2022

table_name = "P4_METERREADING_SUMMER"
# DONE
# month,year = 5, 2023
# month,year = 5, 2023
# month,year = 6, 2022
# month,year = 6, 2023
# month,year = 7, 2022
# month,year = 7, 2023
month,year = 8, 2023

# TBD 
# month,year = 8, 2022

In [21]:
len(eans)

11156

In [22]:
i, batch_size = 0, 2000

print("For", month, year)
acc = pd.DataFrame()
while i < len(eans):
    print(i, i+batch_size)
    if i+batch_size > len(eans):
        print("Last one!")
        sub_eans = eans[i:]
    else:
        sub_eans = eans[i:i+batch_size]
    _df = sf_dao.fetch_list(f"""SELECT EAN_SHA256, LDN, ODN, READINGDATE, MEASUREUNIT, METERNUMBER
                             FROM P4.RAW.INTERVAL WHERE 
                    EAN_SHA256 IN ({','.join(sub_eans)}) 
                    AND READINGDATE LIKE '{month}/%/{year}%'
                    """)
    print("Done")
    i += batch_size
    acc = pd.concat([acc, _df])

acc

For 8 2023
0 2000
Executing query
Fetching all
Writing data to df
Done
2000 4000
Executing query
Fetching all
Writing data to df
Done
4000 6000
Executing query
Fetching all
Writing data to df
Done
6000 8000
Executing query
Fetching all
Writing data to df
Done
8000 10000
Executing query
Fetching all
Writing data to df
Done
10000 12000
Last one!
Executing query
Fetching all
Writing data to df
Done


Unnamed: 0,ean_sha256,ldn,odn,readingdate,measureunit,meternumber
0,BD60BB6084C2C0E1E8F24BAE580D1C1326778CF156FC1C...,17482019.000,14969819.000,8/28/2023 9:00:00 PM +02:00,5,8207
1,BD60BB6084C2C0E1E8F24BAE580D1C1326778CF156FC1C...,17479503.000,14964487.000,8/28/2023 2:00:00 PM +02:00,5,8207
2,BD60BB6084C2C0E1E8F24BAE580D1C1326778CF156FC1C...,17480127.000,14969819.000,8/28/2023 7:30:00 PM +02:00,5,8207
3,BD60BB6084C2C0E1E8F24BAE580D1C1326778CF156FC1C...,17479503.000,14969592.000,8/28/2023 5:45:00 PM +02:00,5,8207
4,BD60BB6084C2C0E1E8F24BAE580D1C1326778CF156FC1C...,17483716.000,14969819.000,8/28/2023 10:30:00 PM +02:00,5,8207
...,...,...,...,...,...,...
3345561,F6BC2D065868501C41B309D198DCBB4C3C996AFFCDB310...,5695801.000,0.000,8/30/2023 7:30:00 AM +02:00,5,E0052005444515819
3345562,F6BC2D065868501C41B309D198DCBB4C3C996AFFCDB310...,5696681.000,0.000,8/30/2023 9:00:00 PM +02:00,5,E0052005444515819
3345563,F6BC2D065868501C41B309D198DCBB4C3C996AFFCDB310...,5696781.000,0.000,8/30/2023 10:15:00 PM +02:00,5,E0052005444515819
3345564,F6BC2D065868501C41B309D198DCBB4C3C996AFFCDB310...,5696301.000,0.000,8/30/2023 3:30:00 PM +02:00,5,E0052005444515819


In [23]:
import datetime
acc['readingdate'] = pd.to_datetime(acc['readingdate'], format="%m/%d/%Y %I:%M:%S %p %z")
acc['readingdate'] = acc['readingdate'].apply(lambda x: x.strftime('%Y-%m-%d %H:%M:%S%z'))
acc['last_modified'] = datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')

acc.sort_values(by='readingdate', inplace=True)
acc['ldn'] = acc['ldn'].astype(float) 
acc['odn'] = acc['odn'].astype(float)
acc['ldn_diff'] = acc.groupby('ean_sha256')['ldn'].diff()
acc['odn_diff'] = acc.groupby('ean_sha256')['odn'].diff()

In [24]:
acc.columns = [column.upper() for column in acc.columns]
write_pandas(
    conn=snowflake_connection,
    df=acc[['EAN_SHA256','LDN','ODN','READINGDATE','METERNUMBER','LDN_DIFF','ODN_DIFF', 'LAST_MODIFIED']],
    chunk_size=200000,
    table_name=table_name
)

  write_pandas(


(True,
 149,
 29690586,
 [('bzopvsfxyq/file103.txt',
   'LOADED',
   200000,
   200000,
   1,
   0,
   None,
   None,
   None,
   None),
  ('bzopvsfxyq/file109.txt',
   'LOADED',
   200000,
   200000,
   1,
   0,
   None,
   None,
   None,
   None),
  ('bzopvsfxyq/file112.txt',
   'LOADED',
   200000,
   200000,
   1,
   0,
   None,
   None,
   None,
   None),
  ('bzopvsfxyq/file12.txt',
   'LOADED',
   200000,
   200000,
   1,
   0,
   None,
   None,
   None,
   None),
  ('bzopvsfxyq/file130.txt',
   'LOADED',
   200000,
   200000,
   1,
   0,
   None,
   None,
   None,
   None),
  ('bzopvsfxyq/file135.txt',
   'LOADED',
   200000,
   200000,
   1,
   0,
   None,
   None,
   None,
   None),
  ('bzopvsfxyq/file141.txt',
   'LOADED',
   200000,
   200000,
   1,
   0,
   None,
   None,
   None,
   None),
  ('bzopvsfxyq/file18.txt',
   'LOADED',
   200000,
   200000,
   1,
   0,
   None,
   None,
   None,
   None),
  ('bzopvsfxyq/file28.txt',
   'LOADED',
   200000,
   200000,
   1,
   0,

: 

In [16]:

df = sf_dao.fetch_list(f"SELECT * FROM SANDBOX.KATRIN.P4_METERREADING_SUMMER WHERE READINGDATE>='2023-05-01 00:00+02:00' AND READINGDATE<='2023-05-15 00:00+02:00'")
df.head()

KeyboardInterrupt: 