In [1]:
import pandas as pd
import sqlite3
from datetime import timedelta
from json import dumps, loads
from src.helpers import DATABASE_LOCATION
# Connect to database 
conn = sqlite3.connect(DATABASE_LOCATION)

# LMP table metadata

### Number of observations

In [29]:
count = pd.read_sql_query("""select COUNT(*) from lmp;""",conn).values[0][0]
print(f"Total number of rows {count:,}")

Total number of rows 2,596,888


### Range of time stamps in database

In [30]:
min_time = pd.read_sql_query("""select min(time) from lmp;""",conn).values[0][0]
max_time = pd.read_sql_query("""select max(time) from lmp;""",conn).values[0][0]
print(f" oldest timestamp: {min_time} \n newest timestamp: {max_time}")

 oldest timestamp: 2019-10-01 00:00:00+00:00 
 newest timestamp: 2019-11-29 23:00:00+00:00


### Number of regions 

In [31]:
pd.read_sql_query("""select COUNT(distinct node) from lmp;""",conn).values[0][0]

1871

# Prelim model, serve this to API 

In [39]:
df_afpr = pd.read_sql_query(
    """select * from lmp
    WHERE node == "AFPR_1_TOT_GEN-APND";
    """,
    conn)
df_afpr.time = pd.to_datetime(df_afpr.time)
mean_lpm = df_afpr[df_afpr['time'].dt.dayofweek==0].mean()[0]
day = "Monday"
print(f'Mean price of LMP on {day}: {mean_lpm:.2f} $/MW')

Mean price of LMP on Monday: 36.78 $/MW


In [None]:
df_afpr['time'][0].weekday()

In [2]:
pd.read_sql_query(
    """select * from lmp
    WHERE node == "AFPR_1_TOT_GEN-APND";
    """,
    conn)


Unnamed: 0,time,node,mw
0,2019-10-01 02:00:00+00:00,AFPR_1_TOT_GEN-APND,48.20406
1,2019-10-01 00:00:00+00:00,AFPR_1_TOT_GEN-APND,33.55219
2,2019-10-01 01:00:00+00:00,AFPR_1_TOT_GEN-APND,43.28855
3,2019-10-01 03:00:00+00:00,AFPR_1_TOT_GEN-APND,40.71762
4,2019-10-01 04:00:00+00:00,AFPR_1_TOT_GEN-APND,35.78075
...,...,...,...
1435,2019-11-29 16:00:00+00:00,AFPR_1_TOT_GEN-APND,35.18822
1436,2019-11-29 14:00:00+00:00,AFPR_1_TOT_GEN-APND,43.55469
1437,2019-11-29 13:00:00+00:00,AFPR_1_TOT_GEN-APND,41.19555
1438,2019-11-29 19:00:00+00:00,AFPR_1_TOT_GEN-APND,28.27466


## Convert a DataFrame to JSON 

In [64]:
data = pd.read_sql_query(
    """select * from lmp
    WHERE node == "AFPR_1_TOT_GEN-APND"
    LIMIT 5;
    """,
    conn).to_json(orient='records')

print(dumps(loads(data), indent=4, sort_keys=True))

[
    {
        "mw": 48.20406,
        "node": "AFPR_1_TOT_GEN-APND",
        "time": "2019-10-01 02:00:00+00:00"
    },
    {
        "mw": 33.55219,
        "node": "AFPR_1_TOT_GEN-APND",
        "time": "2019-10-01 00:00:00+00:00"
    },
    {
        "mw": 43.28855,
        "node": "AFPR_1_TOT_GEN-APND",
        "time": "2019-10-01 01:00:00+00:00"
    },
    {
        "mw": 40.71762,
        "node": "AFPR_1_TOT_GEN-APND",
        "time": "2019-10-01 03:00:00+00:00"
    },
    {
        "mw": 35.78075,
        "node": "AFPR_1_TOT_GEN-APND",
        "time": "2019-10-01 04:00:00+00:00"
    }
]


'monday'