In [117]:
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib

In [119]:
# 1. Set the working directory
working_directory = os.getcwd()
print(f"Current working directory: {working_directory}")

Current working directory: /Users/buboyencarnacion


In [121]:
# 2. Load the datasets
customers_df = pd.read_csv(os.path.join(working_directory, 'updated_customers_data.csv'))
products_df = pd.read_csv(os.path.join(working_directory, 'updated_products_data.csv'))
transactions_df = pd.read_csv(os.path.join(working_directory, 'updated_transactions_data.csv'))

# Display the first few rows of each dataframe to ensure they are loaded correctly
print("Customers DataFrame:")
print(customers_df.head())

print("\nProducts DataFrame:")
print(products_df.head())

print("\nTransactions DataFrame:")
print(transactions_df.head())

Customers DataFrame:
  Company_ID          Company_Name  Company_Profit  \
0        1.0  Tech  Enterprises  1         80701.0   
1        2.0   Global  Partners  2         80511.0   
2        3.0  Quantum Associates 3        110664.0   
3        4.0       Prime Network 4         75301.5   
4        5.0    Elite  Ventures  5         69427.0   

                                             Address Profitability_Category  \
0             EDSA, Barangay 606, Pasig, Philippines                 Medium   
1  Commonwealth Ave, Barangay 789, Taguig, Philip...                 Medium   
2       Roxas Blvd, Barangay 505, Pasig, Philippines                   High   
3  Alabang-Zapote Rd, Barangay 202, Taguig, Phili...                 Medium   
4    Ayala Avenue, Barangay 101, Makati, Philippines                 Medium   

          Region  
0   Barangay 606  
1   Barangay 789  
2   Barangay 505  
3   Barangay 202  
4   Barangay 101  

Products DataFrame:
  Product_ID            Product_Name  Produc

In [123]:
# 3. Convert 'Company_ID' and 'Product_ID' to string to ensure consistent data types for merging
customers_df['Company_ID'] = customers_df['Company_ID'].astype(str)
transactions_df['Company_ID'] = transactions_df['Company_ID'].astype(str)
products_df['Product_ID'] = products_df['Product_ID'].astype(str)
transactions_df['Product_ID'] = transactions_df['Product_ID'].astype(str)

In [125]:
# 4. Merge datasets to create a single dataframe for modeling
merged_df = transactions_df.merge(customers_df, on='Company_ID').merge(products_df, on='Product_ID')

# Display the columns of the merged dataframe to identify the correct column names
print("\nColumns in Merged DataFrame:")
print(merged_df.columns)


Columns in Merged DataFrame:
Index(['Unnamed: 0', 'Transaction_ID', 'Company_ID', 'Product_ID', 'Quantity',
       'Transaction_Date', 'Product_Price_x', 'Total_Cost', 'Recency',
       'Purchase_Frequency', 'Total_Spending', 'Company_Name',
       'Company_Profit', 'Address', 'Profitability_Category', 'Region',
       'Product_Name', 'Product_Price_y', 'Price_Range'],
      dtype='object')


In [127]:
# 5. Feature selection and preprocessing
# Using 'Product_ID' as the target variable and 'Company_ID', 'Transaction_Date', 'Product_Price_y', and 'Total_Cost' as features
X = merged_df[['Company_ID', 'Transaction_Date', 'Product_Price_y', 'Total_Cost']]
y = merged_df['Product_ID']

# Convert categorical features to numerical values
X = pd.get_dummies(X, columns=['Company_ID', 'Transaction_Date'], drop_first=True)

In [129]:
# 6. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [131]:
# 7. Initialize and train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

In [133]:
# 8. Make predictions
y_pred = model.predict(X_test)

In [135]:
# 9. Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy}")

Model Accuracy: 0.7617960426179604


In [137]:
# 10. Save the model (optional)
joblib.dump(model, os.path.join(working_directory, 'predictive_model.pkl'))

print("Model training complete and saved as 'predictive_model.pkl'.")

Model training complete and saved as 'predictive_model.pkl'.
