# Setup and Imports

In [1]:
# Cell 1: Install necessary libraries (run only once if not installed)
# !pip install google-play-scraper pandas scikit-learn transformers torch spacy matplotlib seaborn sqlalchemy cx_Oracle psycopg2-binary ipywidgets widgetsnbextension
# !python -m spacy download en_core_web_sm
# !jupyter nbextension enable --py widgetsnbextension # For interactive elements if you use them

import pandas as pd
import os
import logging
from datetime import datetime

# Configure logging for the notebook
# This ensures logs go to a file and to the console output in Jupyter
log_file_path = 'pipeline_jupyter.log'
if os.path.exists(log_file_path):
    os.remove(log_file_path) # Clear previous log for a fresh run

logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s',
                    handlers=[logging.FileHandler(log_file_path), logging.StreamHandler()])

logging.info("Jupyter Notebook setup complete.")


2025-06-07 16:08:45,186 - INFO - Jupyter Notebook setup complete.


In [4]:
# Import modules from your 'src' and 'config' directories
# Add your project's root directory to the Python path
# This allows importing modules from 'src' and 'config'
import sys
project_root = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
if project_root not in sys.path:
    sys.path.insert(0, project_root)

from src.data_collection import collect_all_bank_reviews
from src.preprocessing import preprocess_reviews, tokenize_and_lemmatize # Added tokenize_and_lemmatize here for consistency
from src.sentiment_analysis import add_sentiment_scores
from src.thematic_analysis import extract_keywords_tfidf, assign_themes
from src.database_manager import DatabaseManager
from src.insights_generator import (
    generate_sentiment_summary,
    plot_sentiment_distribution,
    plot_sentiment_by_bank,
    plot_themes_by_bank,
    generate_recommendations
)
from config.app_config import BANK_APPS, TARGET_REVIEWS_PER_BANK, DB_CONFIG

logging.info("All custom modules and configurations imported.")

ModuleNotFoundError: No module named 'spacy'

# Configuration Review

In [3]:
pip install google_play_scraper

Collecting google_play_scraper
  Obtaining dependency information for google_play_scraper from https://files.pythonhosted.org/packages/33/f7/a23ef3cf8efc9ab3aee565971f59906811e6ce95475314ef7b18d02f30ba/google_play_scraper-1.2.7-py3-none-any.whl.metadata
  Downloading google_play_scraper-1.2.7-py3-none-any.whl.metadata (50 kB)
     ---------------------------------------- 0.0/50.2 kB ? eta -:--:--
     ------------------------------ ------- 41.0/50.2 kB 653.6 kB/s eta 0:00:01
     ------------------------------ ------- 41.0/50.2 kB 653.6 kB/s eta 0:00:01
     ------------------------------ ------- 41.0/50.2 kB 653.6 kB/s eta 0:00:01
     ------------------------------ ------- 41.0/50.2 kB 653.6 kB/s eta 0:00:01
     ------------------------------ ------- 41.0/50.2 kB 653.6 kB/s eta 0:00:01
     ------------------------------ ------- 41.0/50.2 kB 653.6 kB/s eta 0:00:01
     ------------------------------ ------- 41.0/50.2 kB 653.6 kB/s eta 0:00:01
     ------------------------------ ----