In [None]:
# Import necessary modules
import sys
sys.path.append('../src')

from src.data_processing import load_data, clean_data, handle_missing_data, detect_outliers, save_cleaned_data
from src.feature_engineering import extract_date_features
from src.data_visualization import plot_sales_distribution, check_outlier_plot, plot_sales_behavior, plot_sales_vs_customers, plot_correlation_heatmap, plot_promo_effect, plot_assortment_effect
from src.logger import setup_logging

In [None]:
# Set up logging
setup_logging()

In [None]:
# Load and clean data
df = load_data('../data/train.csv')

In [None]:
# Check for missing values
df.isnull().sum()

In [None]:
# Handle missing data
df = handle_missing_data(df)

In [None]:
df_clean = clean_data(df)

In [None]:
# Display the first few rows of the cleaned dataset
df_clean.head()

In [None]:
# Display summary statistics
df_clean.describe()

In [None]:
# Display data types
df_clean.dtypes

In [None]:
# Check for duplicates
df_clean.duplicated().sum()

In [None]:
# Check for outliers
check_outlier_plot(df_clean)

In [None]:
# Detect and remove outliers
df_clean = detect_outliers(df_clean, 'Sales')

In [None]:
# Feature engineering
df_features = extract_date_features(df_clean)

In [None]:
# Visualization
plot_sales_distribution(df_features)

In [None]:
# Plot sales behavior around holidays
plot_sales_behavior(df_features, 'Date', 'Sales', holidays=['2015-07-01', '2015-09-01'])

In [None]:
# Plot the relationship between sales and number of customers
plot_sales_vs_customers(df_features, 'Sales', 'Customers')

In [None]:
# Plot correlation heatmap
plot_correlation_heatmap(df_features)

In [None]:
# Plot the effect of promotions on sales
plot_promo_effect(df_features, 'Promo', 'Sales')

In [None]:
# Plot the effect of assortment type on sales
plot_assortment_effect(df_features, 'Assortment', 'Sales')

In [None]:
# Save cleaned data
save_cleaned_data(df_features, '../data/processed/cleaned_data.csv')