In [10]:
import sys
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

# Helper function to handle try-except blocks
def try_except_block(task_description, task_function):
    try:
        task_function()
    except Exception as e:
        print(f"Error {task_description}: {e}")
        sys.exit(1)

# Add the parent directory to the system path and import modules
def import_modules():
    sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))
    global load_data, split_data, scale_data, train_logistic_regression, train_decision_tree, evaluate_model, plot_scaling_effects, plot_distributions
    from src.data_preprocessing import load_data, split_data, scale_data
    from src.model_training import train_logistic_regression, train_decision_tree
    from src.model_evaluation import evaluate_model, plot_scaling_effects, plot_distributions

try_except_block("importing modules", import_modules)

# Print the current working directory
try_except_block("getting current working directory", lambda: print("Current Working Directory:", os.getcwd()))

# Set project_path based on the directory structure
def set_project_path():
    global project_path
    project_path = os.path.abspath(os.path.join(os.getcwd(), '..'))
    print("Project Path:", project_path)

try_except_block("setting project path", set_project_path)

# Verify the updated Python path
try_except_block("printing Python path", lambda: print("Updated Python Path:", sys.path))

# Load the data
def load_data_task():
    global data_path, df
    data_path = os.path.join(project_path, 'data', 'raw', 'Social_Network_Ads.csv')
    if not os.path.exists(data_path):
        raise FileNotFoundError(f"Data file not found at {data_path}")
    df = load_data(data_path)

try_except_block("loading data", load_data_task)

# Preprocess data
def preprocess_data():
    global X_train, X_test, y_train, y_test, X_train_scaled, X_test_scaled, scaler_mean
    X_train, X_test, y_train, y_test = split_data(df)
    X_train_scaled, X_test_scaled, scaler_mean = scale_data(X_train, X_test)
    print(scaler_mean)

try_except_block("data loading and preprocessing", preprocess_data)

# Train and evaluate Logistic Regression before scaling
def logistic_regression_unscaled():
    lr_model_unscaled = LogisticRegression()
    lr_model_unscaled.fit(X_train, y_train)
    lr_accuracy_unscaled = evaluate_model(lr_model_unscaled, X_test, y_test)
    print("Logistic Regression Accuracy (Unscaled):", lr_accuracy_unscaled)

try_except_block("training Logistic Regression (Unscaled)", logistic_regression_unscaled)

# Train and evaluate Logistic Regression after scaling
def logistic_regression_scaled():
    lr_model_scaled = train_logistic_regression(X_train_scaled, y_train)
    lr_accuracy_scaled = evaluate_model(lr_model_scaled, X_test_scaled, y_test)
    print("Logistic Regression Accuracy (Scaled):", lr_accuracy_scaled)

try_except_block("training Logistic Regression (Scaled)", logistic_regression_scaled)

# Train and evaluate Decision Tree before scaling
def decision_tree_unscaled():
    dt_model_unscaled = DecisionTreeClassifier()
    dt_model_unscaled.fit(X_train, y_train)
    dt_accuracy_unscaled = evaluate_model(dt_model_unscaled, X_test, y_test)
    print("Decision Tree Accuracy (Unscaled):", dt_accuracy_unscaled)

try_except_block("training Decision Tree (Unscaled)", decision_tree_unscaled)

# Train and evaluate Decision Tree after scaling
def decision_tree_scaled():
    dt_model_scaled = train_decision_tree(X_train_scaled, y_train)
    dt_accuracy_scaled = evaluate_model(dt_model_scaled, X_test_scaled, y_test)
    print("Decision Tree Accuracy (Scaled):", dt_accuracy_scaled)

try_except_block("training Decision Tree (Scaled)", decision_tree_scaled)

# Convert numpy arrays to DataFrames for saving
def convert_to_dataframe():
    global X_train_scaled_df, X_test_scaled_df
    X_train_scaled_df = pd.DataFrame(X_train_scaled, columns=df.columns[:-1])
    X_test_scaled_df = pd.DataFrame(X_test_scaled, columns=df.columns[:-1])

try_except_block("converting numpy arrays to DataFrames", convert_to_dataframe)

# Plot effects of scaling
def plot_effects_of_scaling():
    plot_scaling_effects(X_train, X_train_scaled_df)
    plot_distributions(X_train, X_train_scaled_df)

try_except_block("plotting scaling effects", plot_effects_of_scaling)


Current Working Directory: /media/amit/Backup/Feature_scaling/notebooks
Project Path: /media/amit/Backup/Feature_scaling
Updated Python Path: ['/usr/lib/python310.zip', '/usr/lib/python3.10', '/usr/lib/python3.10/lib-dynload', '', '/media/amit/Backup/Feature_scaling/.venv/lib/python3.10/site-packages', '/media/amit/Backup/Feature_scaling', '/media/amit/Backup/Feature_scaling', '/media/amit/Backup/Feature_scaling', '/media/amit/Backup/Feature_scaling', '/media/amit/Backup/Feature_scaling', '/media/amit/Backup/Feature_scaling', '/media/amit/Backup/Feature_scaling']
[3.78642857e+01 6.98071429e+04]
Error training Logistic Regression (Unscaled): 'function' object has no attribute 'predict'


AttributeError: 'tuple' object has no attribute 'tb_frame'