In [None]:
# Import necessary libraries
import pandas as pd
from scripts.data_processing import clean_data, feature_engineering
from scripts.visualizations import plot_sales_distribution, plot_promo_effect, plot_store_type_sales, plot_competition_effect


In [None]:
# Load datasets
train = pd.read_csv('../data/rossmann-store-sales/train.csv')
test = pd.read_csv('../data/rossmann-store-sales/test.csv')
store = pd.read_csv('../data/rossmann-store-sales/store.csv')



In [None]:
# Initial Data Overview
print("Training data overview:")
print(train.head())  # Replaced 'display' with 'print' to work in Jupyter or script environments
print("\nStore data overview:")
print(store.head())  # Same here for printing



In [None]:

# Check for missing values before cleaning
print("\nMissing values in training data:")
print(train.isnull().sum())
print("\nMissing values in store data:")
print(store.isnull().sum())



In [None]:
# Clean the data (Missing values, outliers)
train_cleaned = clean_data(train, store)

# Verify cleaning steps
print("\nCleaned training data (head):")
print(train_cleaned.head())
print("\nRemaining missing values after cleaning:")
print(train_cleaned.isnull().sum())



In [None]:
# Feature Engineering (Promo, Holidays, etc.)
train_fe = feature_engineering(train_cleaned)

# Verify feature engineering steps
print("\nTraining data after feature engineering (head):")
print(train_fe.head())



In [None]:
# Visualization: Sales distribution
print("\nPlotting sales distribution...")
plot_sales_distribution(train_fe)

# Visualization: Promo effect on sales
print("\nPlotting promo effect on sales...")
plot_promo_effect(train_fe)

# Visualization: Store Type and Sales
print("\nPlotting store type and sales distribution...")
plot_store_type_sales(train_fe)

# Visualization: Competition Effect on Sales
print("\nPlotting competition effect on sales...")
plot_competition_effect(train_fe)