In [1]:
import sys
import os

# Ensure src/ is in the Python path
sys.path.append(os.path.abspath("../src"))

In [2]:
# Import the feature engineering functions
from feature_engineering import (
    create_temporal_features,
    add_text_features,
    add_sentiment_features,
    encode_categorical_features,
    remove_duplicates,
    handle_missing_values
)
from db_operations import read_data_from_table, write_data_to_table
from logging_utils import setup_logger
# Set up logging
logger = setup_logger(name='my_logger', log_file='../notebooks/logs/my_log.log')
logger.info("Starting feature engineering process...")

In [3]:
# Specify the table name
table_name = 'telegram_messages'

# Fetch data from the database
df = read_data_from_table(table_name)
logger.info(f"Loaded {len(df)} rows from {table_name}")

# Display first few rows
df.head()

SQLAlchemy engine created successfully!
Data read from table 'telegram_messages' successfully!


Unnamed: 0,id,date,message,views,media
0,97,2023-02-10 12:23:06,"⚠️Notice!\nDear esteemed customers,\nDue to fo...",1047.0,True
1,96,2023-02-02 08:58:52,Mela-One በውስጡ ሆርሞን ያለው ድንገተኛ ወሊድ መቆጣጠርያ ሲሆን ያለ...,1013.0,True
2,95,2023-02-01 08:59:37,አዚትሮማይሲን በሃኪም መድሃኒት ማዘዣ ከሚታዘዙ አንቲባዮቲኮች አንዱ ሲሆን...,920.0,True
3,94,2023-01-31 09:19:53,Che-Med Trivia #3\n\nምግብና መጠጦች አንዳንድ መድሃኒቶች በደ...,712.0,True
4,93,2023-01-30 09:45:25,"Che-Med Trivia #2\n\nእንደ Ciprofloxacin, Doxycy...",620.0,True


In [4]:
# remove duplicates
df = remove_duplicates(df)
logger.info("Duplicates removed successfully.")

df = handle_missing_values(df)
logger.info("Missing values handled successfully.")


In [5]:
# Apply Feature Engineering
df = create_temporal_features(df)
logger.info("Temporal features created successfully.")

df = add_text_features(df)
logger.info("Text features added successfully.")

df = add_sentiment_features(df)
logger.info("Sentiment features added successfully.")

df = encode_categorical_features(df)
logger.info("Categorical features encoded successfully.")


In [6]:
# Define cleaned table name
cleaned_table_name = "cleaned_telegram_messages"

# Save cleaned data to database
write_data_to_table(df, cleaned_table_name)
logger.info(f"Cleaned data saved successfully in table: {cleaned_table_name}")


SQLAlchemy engine created successfully!
Data written successfully to 'cleaned_telegram_messages'
