In [None]:
import psycopg2
import pandas as pd

# Create a connection
conn = psycopg2.connect(
    host='db',  # Use the service name from Docker Compose as the hostname
    port=5432,
    dbname='tradedb',
    user='postgres',
    password='postgres'
)

# Create a cursor object
cur = conn.cursor()

# Execute a query
cur.execute("SELECT * FROM raw.trades")

# Fetch the results
rows = cur.fetchall()

# Get the column names from the cursor description
columns = [desc[0] for desc in cur.description]

# Create a DataFrame
df = pd.DataFrame(rows, columns=columns)

# Convert the 'timestamp' column to date
df['date'] = pd.to_datetime(df['timestamp']).dt.date

# # # Sort the DataFrame by account_id, symbol, and timestamp
df = df.sort_values(by=['account_id', 'symbol', 'timestamp'])

df.reset_index(drop=True, inplace=True)

# Assuming df is your DataFrame
# Add a new column initialized to 0
df['day_trades'] = 0

# Iterate over the DataFrame starting from the second row
for i in range(1, len(df)):
    # Check conditions
    if (df.loc[i, 'account_id'] == df.loc[i-1, 'account_id'] and
        df.loc[i, 'symbol'] == df.loc[i-1, 'symbol'] and
        df.loc[i, 'date'] == df.loc[i-1, 'date']):
        
        if df.loc[i-1, 'day_trades'] == 1:
            df.loc[i, 'day_trades'] = 0
        elif df.loc[i, 'side'] != df.loc[i-1, 'side']:
            df.loc[i, 'day_trades'] = 1
        else:
            df.loc[i, 'day_trades'] = 0
    else:
        df.loc[i, 'day_trades'] = 0

# Calculate the sum of day trades for each account_id
grouped_df = df.groupby('account_id')['day_trades'].sum().reset_index()

# Sort the results in descending order by day_trades
grouped_df = grouped_df.sort_values(by='day_trades', ascending=False)

# Write the sorted DataFrame to CSV
grouped_df.to_csv('pdt_count.csv', index=False)

# Close the cursor and connection
cur.close()
conn.close()