In [2]:
# Import necessary libraries
import os
import pandas as pd

In [4]:
# Get the current working directory
working_directory = os.getcwd()
print(f"Current working directory: {working_directory}")

Current working directory: /Users/buboyencarnacion


In [10]:
# Load the transactions data
transactions_data_path = os.path.join(working_directory, 'transactions_data.csv')
transactions_data = pd.read_csv(transactions_data_path)

In [12]:
# Exploratory Data Analysis (EDA) for Missing Values and Data Types
transactions_missing = transactions_data.isnull().sum()
transactions_data_types = transactions_data.dtypes

In [14]:
# Handle missing values in Transactions Data
transactions_data['Product_Price'] = transactions_data['Product_Price'].fillna(transactions_data['Product_Price'].median())
transactions_data['Total_Cost'] = transactions_data['Total_Cost'].fillna(transactions_data['Total_Cost'].median())

In [16]:
# Convert 'Transaction_Date' to datetime format
transactions_data['Transaction_Date'] = pd.to_datetime(transactions_data['Transaction_Date'], errors='coerce')

In [18]:
# Feature Engineering
# Recency (time since last transaction)
transactions_data['Recency'] = (transactions_data['Transaction_Date'].max() - transactions_data['Transaction_Date']).dt.days

In [20]:
# Purchase Frequency (number of transactions per customer)
purchase_frequency = transactions_data.groupby('Company_ID')['Transaction_ID'].count().reset_index()
purchase_frequency.columns = ['Company_ID', 'Purchase_Frequency']
transactions_data = transactions_data.merge(purchase_frequency, on='Company_ID', how='left')

In [22]:
# Total Spending per customer
total_spending = transactions_data.groupby('Company_ID')['Total_Cost'].sum().reset_index()
total_spending.columns = ['Company_ID', 'Total_Spending']
transactions_data = transactions_data.merge(total_spending, on='Company_ID', how='left')

In [24]:
# Save the cleaned dataset
cleaned_transactions_data_path = os.path.join(working_directory, 'updated_transactions_data.csv')
transactions_data.to_csv(cleaned_transactions_data_path, index=False)

In [26]:
print("Transactions data preprocessing and feature engineering completed successfully.")

Transactions data preprocessing and feature engineering completed successfully.
