-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat(analytics): python setup & read model sync & logger
- Loading branch information
Showing
6 changed files
with
288 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
import os | ||
import time | ||
from dotenv import load_dotenv | ||
|
||
from flask import Flask, request, abort | ||
|
||
from scripts.update_read_model import update_read_model | ||
from util.log_util import logger | ||
|
||
app = Flask(__name__) | ||
|
||
load_dotenv() | ||
ANALYTICS_SECRET_TOKEN = os.getenv("ANALYTICS_SECRET_TOKEN") | ||
|
||
|
||
@app.route("/") | ||
def hello_world(): | ||
token = request.headers.get('Authorization') | ||
|
||
if token != ANALYTICS_SECRET_TOKEN or ANALYTICS_SECRET_TOKEN is None: | ||
abort(401) | ||
|
||
start_time = time.time() | ||
|
||
try: | ||
update_read_model() | ||
except Exception as e: | ||
logger.error(f"Script update_read_model failed", {"error": e}) | ||
|
||
logger.flush() | ||
|
||
return { | ||
"message": 'Script executed successfully!', | ||
"time_taken": f"{time.time() - start_time:.2f}s" | ||
} |
Empty file.
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,101 @@ | ||
import os | ||
import MySQLdb | ||
import pandas as pd | ||
|
||
from util.log_util import logger | ||
|
||
connection = MySQLdb.connect( | ||
host=os.getenv("DB_HOST"), | ||
user=os.getenv("DB_USERNAME"), | ||
passwd=os.getenv("DB_PASSWORD"), | ||
db=os.getenv("DB_NAME"), | ||
autocommit=True, | ||
ssl_mode="VERIFY_IDENTITY", | ||
ssl={ | ||
"ca": "/etc/ssl/cert.pem" | ||
} | ||
) | ||
|
||
|
||
def upsert_table(cursor, table_name, dtypes_def): | ||
parquet_file = f"./data/{table_name}.parquet" | ||
|
||
# Load existing Parquet file (if exists) | ||
if os.path.isfile(parquet_file): | ||
df_parquet = pd.read_parquet(parquet_file) | ||
if not df_parquet.empty: | ||
last_id = df_parquet.index.max() # Assuming 'id' is the column with IDs | ||
else: | ||
last_id = 0 | ||
else: | ||
df_parquet = pd.DataFrame() | ||
last_id = 0 | ||
|
||
# TODO: improve user update logic | ||
# Read new data from MySQL | ||
if table_name != "fpp_users": | ||
cursor.execute(f"SELECT * FROM {table_name} WHERE id > {last_id}") | ||
else: | ||
cursor.execute("SELECT * FROM fpp_users ORDER BY created_at DESC") | ||
|
||
missing_records = cursor.fetchall() | ||
missing_records_column_names = [i[0] for i in cursor.description] | ||
df_mysql = pd.DataFrame(missing_records, columns=missing_records_column_names) | ||
|
||
amount_of_new_records = len(df_mysql) | ||
|
||
if amount_of_new_records == 0: | ||
logger.debug(f"No new records for table", {"table_name": table_name}) | ||
return | ||
|
||
if table_name != "fpp_users": | ||
df_mysql.set_index('id', inplace=True) | ||
|
||
df_mysql = df_mysql.astype(dtypes_def) | ||
|
||
if table_name == "fpp_votes": | ||
df_mysql['was_auto_flip'] = df_mysql['was_auto_flip'].map({0: False, 1: True}) | ||
|
||
# Debug info | ||
# logger.debug(df_mysql.head()) | ||
# logger.debug(df_mysql.dtypes) | ||
# logger.debug({ | ||
# "df_parquet": len(df_parquet), | ||
# "df_mysql": len(df_mysql) | ||
# }) | ||
|
||
# Merge new data from MySQL with existing data in Parquet | ||
if table_name != "fpp_users": | ||
df = pd.concat([df_mysql, df_parquet]) | ||
df.to_parquet(parquet_file) | ||
|
||
df_mysql.to_parquet(parquet_file) | ||
|
||
logger.info(f"Upserted records for in read model", { | ||
"table_name": table_name, | ||
"amount_of_new_records": amount_of_new_records | ||
}) | ||
|
||
|
||
def update_read_model(): | ||
logger.debug("update_read_model called!") | ||
|
||
# Create cursor and use it to execute SQL command | ||
cursor = connection.cursor() | ||
cursor.execute("select @@version") | ||
version = cursor.fetchone() | ||
|
||
if version: | ||
logger.debug(f"Running version: ${version}") | ||
else: | ||
logger.debug('Not connected to db') | ||
|
||
upsert_table(cursor, "fpp_estimations", | ||
{'user_id': 'str', 'room_id': 'int16', 'estimation': 'int16', 'spectator': 'int16'}) | ||
upsert_table(cursor, "fpp_events", {'user_id': 'str', 'event': 'category'}) | ||
upsert_table(cursor, "fpp_page_views", {'user_id': 'str', 'route': 'category', 'room_id': 'Int16'}) | ||
upsert_table(cursor, "fpp_rooms", {'number': 'int16', 'name': 'str'}) | ||
upsert_table(cursor, "fpp_votes", {'room_id': 'int16', 'min_estimation': 'int16', 'max_estimation': 'int16', | ||
'amount_of_estimations': 'int16', 'amount_of_spectators': 'int16', | ||
'duration': 'int16'}) | ||
upsert_table(cursor, "fpp_users", {}) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import glob | ||
import json | ||
import os | ||
import time | ||
import traceback | ||
|
||
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO") | ||
|
||
|
||
def delete_old_logs(self): | ||
one_week_ago = time.time() - 7 * 24 * 60 * 60 # Time stamp one week ago | ||
for log_file in glob.glob('./logs/logs_*.txt'): | ||
if os.path.getmtime(log_file) < one_week_ago: | ||
os.remove(log_file) | ||
|
||
|
||
class LoggerClass: | ||
def __init__(self): | ||
self.logs = [] | ||
|
||
def debug(self, msg, properties=None): | ||
if LOG_LEVEL == "DEBUG": | ||
self._log('DEBUG', msg, properties) | ||
|
||
def info(self, msg, properties=None): | ||
self._log('INFO', msg, properties) | ||
|
||
def warn(self, msg, properties=None): | ||
self._log('WARN', msg, properties) | ||
|
||
def error(self, msg, properties=None): | ||
if properties and isinstance(properties.get("error"), BaseException): | ||
properties["error"] = self._serialize_exception(properties["error"]) | ||
self._log('ERROR', msg, properties) | ||
|
||
def _serialize_exception(self, e): | ||
return "".join(traceback.format_exception(None, e, e.__traceback__)) | ||
|
||
def _log(self, level, msg, properties=None): | ||
log_entry = {"level": level, "msg": msg, "properties": properties, | ||
"timestamp": time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime())} | ||
self.logs.append(log_entry) | ||
|
||
def flush(self): | ||
log_filename = './logs/logs_' + time.strftime("%Y%m%d") + '.txt' # Creates file name like 'logs_20220101.txt' | ||
with open(log_filename, 'a') as log_file: | ||
for log in self.logs: | ||
log_line = f"[{log['timestamp']}][{log['level']}]: {log['msg']}" | ||
if log['properties']: | ||
log_line += f" - {json.dumps(log['properties'])}" | ||
print(log_line) # Print to console | ||
log_file.write(log_line + '\n') # Write to file | ||
self.logs = [] # Clear logs in memory | ||
delete_old_logs(self) | ||
|
||
|
||
logger = LoggerClass() |