In [6]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

# Load the dataset
data = pd.read_csv('your_dataset.csv')

# Convert target variable to categorical
data['Item Purchased'] = data['Item Purchased'].astype('category')

# Split features and target variable
X = data[['Age', 'Gender', 'Location', 'Season']]
y = data['Item Purchased']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define preprocessing steps for categorical variables
categorical_cols = ['Gender', 'Location', 'Season']
preprocessor = ColumnTransformer(
    transformers=[
        ('cat', OneHotEncoder(), categorical_cols)
    ])

# Define the model
model = RandomForestClassifier(n_estimators=1000, random_state=42)

# Create a pipeline
pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('model', model)])

# Train the model
pipeline.fit(X_train, y_train)

# Make predictions (probabilities)
y_probs = pipeline.predict_proba(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_probs.argmax(axis=1))
print("Accuracy:", accuracy)


# Import joblib
import joblib

# Train the model
pipeline.fit(X_train, y_train)

# Save the model to a file
joblib.dump(pipeline, 'model.pkl')

# Load the model from the file
loaded_model = joblib.load('model.pkl')

# Get top 10 recommended items for a new input
def get_top_10_recommendations(new_input):
    # Transform the new input using the preprocessor
    new_input_transformed = preprocessor.transform(new_input)
    
    # Check missing columns and add them if necessary
    missing_cols = set(pipeline.named_steps['preprocessor'].get_feature_names_out()) - set(new_input.columns)
    if missing_cols:
        for col in missing_cols:
            new_input[col] = 0
    
    # Reorder columns to match the order in the training data
    new_input_transformed = new_input[new_input.columns.tolist()]
    
    # Predict probabilities for the new input
    new_input_probs = pipeline.predict_proba(new_input_transformed)
    
    # Get the indices of top 10 probabilities
    top_10_indices = new_input_probs.argsort()[0][-10:][::-1]
    
    # Get the corresponding item labels
    top_10_items = pipeline.classes_[top_10_indices]
    
    return top_10_items



Accuracy: 0.0


In [7]:
# Example of getting recommendations for a new input
new_input = pd.DataFrame({'Age': [50],
                          'Gender': ['Female'],
                          'Location': ['California'],
                          'Season': ['Spring']})

recommendations = get_top_10_recommendations(new_input)
print("Top 10 Recommendations:", recommendations)


Top 10 Recommendations: ['Skirt' 'Hoodie' 'Sandals' 'Sneakers' 'Boots' 'Scarf' 'Shorts' 'Coat'
 'Pants' 'Shirt']
