# Setup and Environment Configuration
This notebook sets up the environment, loads necessary modules, and establishes a database connection.


In [5]:
import sys
from pathlib import Path

# Print out the current sys.path
print("Current sys.path:")
for p in sys.path:
    print(p)

# Check if the modules path exists
repo_dir = Path.cwd().parents[0]
modules_path = repo_dir / 'modules'
print(f"\nModules path exists: {modules_path.exists()}")
print(f"Modules path: {modules_path}")

from pathlib import Path

# Check if the file exists
module_file = Path(repo_dir / 'modules' / 'loading_preprocessed_designs.py')
print(f"Module file exists: {module_file.exists()}")



Current sys.path:
/Users/jf/miniconda3/envs/data_challenge_git/lib/python38.zip
/Users/jf/miniconda3/envs/data_challenge_git/lib/python3.8
/Users/jf/miniconda3/envs/data_challenge_git/lib/python3.8/lib-dynload

/Users/jf/miniconda3/envs/data_challenge_git/lib/python3.8/site-packages
/Users/jf/miniconda3/envs/data_challenge_git/lib/python3.8/site-packages/setuptools/_vendor
/Users/jf/_lokal/DataChallenge_LLM_REPipeline/libs
/Users/jf/_lokal/DataChallenge_LLM_REPipeline/modules
/Users/jf/_lokal/DataChallenge_LLM_REPipeline/libs
/Users/jf/_lokal/DataChallenge_LLM_REPipeline/modules

Modules path exists: True
Modules path: /Users/jf/_lokal/DataChallenge_LLM_REPipeline/modules
Module file exists: True


In [2]:
# Importing Modules and Setting Up Environment Variables

import sys
import os
import pandas as pd
from pathlib import Path
from dotenv import load_dotenv, find_dotenv
from sqlalchemy import create_engine
import warnings

# Load Environment Variables
_ = load_dotenv(find_dotenv())

# Add Submodule Path and Verify Symlink
repo_dir = Path.cwd().parents[0]  # Move up to the parent directory
sys.path.append(str(repo_dir / 'libs'))
sys.path.append(str(repo_dir / 'modules'))

# Ensure the symlink exists (assuming setup_symlink.py has been executed)
symlink_path = repo_dir / 'libs'
if not symlink_path.exists():
    print(f"Error: Symlink {symlink_path} does not exist. Run setup_symlink.py first.")
    sys.exit(1)

# Import Custom Modules and Set Display Options
from NLP_on_multilingual_coin_datasets.cnt.io import Database_Connection
from modules.loading_preprocessed_designs import PreprocessingConfig, LoadingPreprocessedDesigns
from modules import scripts, prompts

# Set up pandas display options for better readability
pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.width', 1000)

# Suppress warnings
warnings.filterwarnings('ignore')

# Define Database Connection Parameters
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
database = "nlp_challenge"  # Ensure the correct database name is set

connection_string = f"mysql+mysqlconnector://{db_user}:{db_password}@{db_host}:{db_port}/{database}"
engine = create_engine(connection_string)


ModuleNotFoundError: No module named 'modules'

# Writing Unit Tests for `LoadingPreprocessedDesigns`
This notebook contains unit tests to verify the functionality of the `LoadingPreprocessedDesigns` class.


In [None]:
# Unit Test Class Definition

class TestLoadingPreprocessedDesigns(unittest.TestCase):
    
    @patch('pandas.read_sql_query')
    def test_load_entities_success(self, mock_read_sql):
        # Mock data returned by read_sql_query
        mock_data = pd.DataFrame({
            'id': [1, 2],
            'name_en': ['entity1', 'entity2'],
            'alternativenames_en': ['alt1, alt2', 'alt3']
        })
        mock_read_sql.return_value = mock_data

        result = lpd.load_entities()

        # Assert the processed data is as expected
        self.assertIn('PERSON', result)
        self.assertIn('entity1', result['PERSON'])
        self.assertIn('alt2', result['PERSON'])

    @patch('pandas.read_sql_query')
    def test_load_entities_no_data(self, mock_read_sql):
        # Mock an empty dataframe returned by read_sql_query
        mock_read_sql.return_value = pd.DataFrame()

        result = lpd.load_entities()

        # Expect empty result
        self.assertEqual(result, {})

    def test_load_entities_invalid_add_columns(self):
        # Test with invalid add_columns (not a list or string)
        lpd.prep_cfg.add_columns = 123  # Invalid type

        with self.assertRaises(TypeError):
            lpd.load_entities()

# Running Unit Tests
if __name__ == '__main__':
    unittest.main(argv=[''], exit=False)
