# Using the predictions

We now have a .parquet with predictions for every hour for a year for each LocationID

We will normalise these predictions and store these normalised predictions in a new parquet file

We may also join with coordinations for each LocationID, though this may not be strictly neccessary at this stage

In [2]:
import pandas as pd

predictions = pd.read_parquet('predictions.parquet')

In [3]:
predictions

Unnamed: 0,location_id,hour,day,month,week_of_month,prediction
0,4,0,0,7,1,57.71
1,4,1,0,7,1,36.62
2,4,2,0,7,1,20.20
3,4,3,0,7,1,8.90
4,4,4,0,7,1,10.60
...,...,...,...,...,...,...
578221,263,20,0,7,1,744.25
578222,263,21,0,7,1,625.61
578223,263,22,0,7,1,575.76
578224,263,23,0,7,1,350.31


In [4]:
# Normalize the predictions using Min-Max scaling
predictions['normalised_prediction'] = (predictions['prediction'] - predictions['prediction'].min()) / (predictions['prediction'].max() - predictions['prediction'].min())
predictions

Unnamed: 0,location_id,hour,day,month,week_of_month,prediction,normalised_prediction
0,4,0,0,7,1,57.71,0.002113
1,4,1,0,7,1,36.62,0.001327
2,4,2,0,7,1,20.20,0.000716
3,4,3,0,7,1,8.90,0.000294
4,4,4,0,7,1,10.60,0.000358
...,...,...,...,...,...,...,...
578221,263,20,0,7,1,744.25,0.027700
578222,263,21,0,7,1,625.61,0.023278
578223,263,22,0,7,1,575.76,0.021420
578224,263,23,0,7,1,350.31,0.013018


In [5]:
# Drop the 'prediction' column from the predictions DataFrame
predictions = predictions.drop('prediction', axis=1)
predictions

Unnamed: 0,location_id,hour,day,month,week_of_month,normalised_prediction
0,4,0,0,7,1,0.002113
1,4,1,0,7,1,0.001327
2,4,2,0,7,1,0.000716
3,4,3,0,7,1,0.000294
4,4,4,0,7,1,0.000358
...,...,...,...,...,...,...
578221,263,20,0,7,1,0.027700
578222,263,21,0,7,1,0.023278
578223,263,22,0,7,1,0.021420
578224,263,23,0,7,1,0.013018


In [6]:
import pyarrow.parquet as pq
import pyarrow as pa

# Convert DataFrame to PyArrow Table
table = pa.Table.from_pandas(predictions)

# Write the Parquet file
pq.write_table(table, 'normalised_predictions.parquet')

## Connecting to database

In [8]:
import pandas as pd
from sqlalchemy import create_engine

# Assuming you already have the predictions DataFrame

# Set up the PostgreSQL connection
database = 'cafes_manhattan'
user = 'maxgirt'
password = 'admin'
host = 'localhost'
port = '5432'

# Create the engine for connecting to the PostgreSQL server
engine = create_engine(f'postgresql://{user}:{password}@{host}:{port}/{database}')

# Store the DataFrame as a table in the PostgreSQL server
table_name = 'predictions'
predictions.to_sql(table_name, engine, if_exists='replace', index=False)

# Close the database connection
engine.dispose()
