In [8]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from mlxtend.frequent_patterns import apriori, association_rules

# Load the data
data = pd.read_csv('data.csv')

# Perform clustering to group similar items (categories) together
# We'll use KMeans for this example
scaler = StandardScaler()
X = data[['spicy', 'creamy', 'contains_fruits']]
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=3, random_state=42)
data['category_cluster'] = kmeans.fit_predict(X_scaled)

# Train a classification model to predict customer feedback based on features
X = data.drop(columns=['feedback'])
y = data['feedback']
model = RandomForestClassifier(random_state=42)
model.fit(X, y)

# Make predictions to identify items with low feedback ratings
data['predicted_feedback'] = model.predict(X)

# Find frequent itemsets using association rule mining
frequent_itemsets = apriori(data[['category', 'flavor', 'age_group']], min_support=0.1, use_colnames=True)

# Generate association rules to find relationships between items
association_rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

# Suggestions for the owner
# Find categories with low feedback ratings
categories_to_remove = data[data['predicted_feedback'] < 4]['category'].unique()

# Find flavors that are liked in different age groups
flavors_by_age_group = data.groupby(['flavor', 'age_group'])['feedback'].mean().reset_index()

# Find frequent associations between food items
frequent_associations = association_rules[['antecedents', 'consequents', 'lift']]

print("Categories to remove:", categories_to_remove)
print("Flavors liked by age group:")
print(flavors_by_age_group)
print("Frequent associations between food items:")
print(frequent_associations)


ValueError: could not convert string to float: 'Yes'

In [6]:
!pip install mlxtend


Collecting mlxtend
  Using cached mlxtend-0.22.0-py2.py3-none-any.whl (1.4 MB)
Installing collected packages: mlxtend
Successfully installed mlxtend-0.22.0


In [11]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from mlxtend.frequent_patterns import apriori, association_rules

# Load the data
data = pd.read_csv('data.csv')

# Convert 'age' and 'rating' to ordinal integers
age_mapping = {'less than 20': 1, '20 to 30': 2, '30 to 40': 3, '40 to 50': 4, '50 to 60': 5, 'greater than 60': 6}
rating_mapping = {'1': 1, '2': 2, '3': 3, '4': 4, '5': 5}
data['age'] = data['age'].map(age_mapping)
data['feedback'] = data['feedback'].astype(str)  # Convert to string first to handle NaNs
data['feedback'] = data['feedback'].map(rating_mapping)

# Perform clustering to group similar items (categories) together
# We'll use KMeans for this example
scaler = StandardScaler()
X = data[['spicy', 'creamy', 'contains_fruits']]
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=3, random_state=42)
data['category_cluster'] = kmeans.fit_predict(X_scaled)

# Train a classification model to predict customer feedback based on features
X = data.drop(columns=['feedback'])
y = data['feedback']
model = RandomForestClassifier(random_state=42)
model.fit(X, y)

# Make predictions to identify items with low feedback ratings
data['predicted_feedback'] = model.predict(X)

# Find frequent itemsets using association rule mining
frequent_itemsets = apriori(data[['category', 'flavor', 'age_group']], min_support=0.1, use_colnames=True)

# Generate association rules to find relationships between items
association_rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

# Suggestions for the owner
# Find categories with low feedback ratings
categories_to_remove = data[data['predicted_feedback'] < 4]['category'].unique()

# Find flavors that are liked in different age groups
flavors_by_age_group = data.groupby(['flavor', 'age_group'])['feedback'].mean().reset_index()

# Find frequent associations between food items
frequent_associations = association_rules[['antecedents', 'consequents', 'lift']]

print("Categories to remove:", categories_to_remove)
print("Flavors liked by age group:")
print(flavors_by_age_group)
print("Frequent associations between food items:")
print(frequent_associations)


ValueError: could not convert string to float: 'Yes'

In [14]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from mlxtend.frequent_patterns import apriori, association_rules

# Load the data
data = pd.read_csv('data.csv')

# Perform label encoding for all non-numeric columns
label_encoder = LabelEncoder()
for column in data.columns:
    if data[column].dtype == 'object' and column not in ['age', 'feedback']:
        data[column] = label_encoder.fit_transform(data[column])

# Perform clustering to group similar items (categories) together
# We'll use KMeans for this example
scaler = StandardScaler()
X = data.drop(columns=['age', 'feedback'])
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=3, random_state=42)
data['category_cluster'] = kmeans.fit_predict(X_scaled)

# Train a classification model to predict customer feedback based on features
X = data.drop(columns=['feedback'])
y = data['feedback']
model = RandomForestClassifier(random_state=42)
model.fit(X, y)

# Make predictions to identify items with low feedback ratings
data['predicted_feedback'] = model.predict(X)

# Find frequent itemsets using association rule mining
frequent_itemsets = apriori(data[['category', 'major_flavor', 'age']], min_support=0.1, use_colnames=True)

# Generate association rules to find relationships between items
association_rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

# Suggestions for the owner
# Find categories with low feedback ratings
categories_to_remove = data[data['predicted_feedback'] < 4]['category'].unique()

# Find flavors that are liked in different age groups
flavors_by_age_group = data.groupby(['flavor', 'age_group'])['feedback'].mean().reset_index()

# Find frequent associations between food items
frequent_associations = association_rules[['antecedents', 'consequents', 'lift']]

print("Categories to remove:", categories_to_remove)
print("Flavors liked by age group:")
print(flavors_by_age_group)
print("Frequent associations between food items:")
print(frequent_associations)




ValueError: The allowed values for a DataFrame are True, False, 0, 1. Found value 26

In [19]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from mlxtend.frequent_patterns import apriori, association_rules

# Load the data
data = pd.read_csv('data.csv')

# Perform label encoding for 'category', 'major_flavor', and 'age' columns
label_encoder = LabelEncoder()
columns_to_label_encode = ['name','category', 'major_flavor', 'age','spicy','creamy','contains_fruits','time','gender','season']
for column in columns_to_label_encode:
    data[column] = label_encoder.fit_transform(data[column])

# Perform clustering to group similar items (categories) together
# We'll use KMeans for this example
scaler = StandardScaler()
X = data.drop(columns=['feedback'])
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=3, random_state=42)
data['category_cluster'] = kmeans.fit_predict(X_scaled)

# Train a classification model to predict customer feedback based on features
X = data.drop(columns=['feedback'])
y = data['feedback']
model = RandomForestClassifier(random_state=42)
model.fit(X, y)

# Make predictions to identify items with low feedback ratings
data['predicted_feedback'] = model.predict(X)

# Find frequent itemsets using association rule mining
frequent_itemsets = apriori(data[['category', 'major_flavor', 'age']], min_support=0.1, use_colnames=True)

# Generate association rules to find relationships between items
association_rules = association_rules(frequent_itemsets, metric='lift', min_threshold=1)

# Suggestions for the owner
# Find categories with low feedback ratings
categories_to_remove = data[data['predicted_feedback'] < 4]['category_cluster'].unique()

# Find flavors that are liked in different age groups
flavors_by_age_group = data.groupby(['major_flavor', 'age'])['feedback'].mean().reset_index()

# Find frequent associations between food items
frequent_associations = association_rules[['antecedents', 'consequents', 'lift']]

print("Categories to remove:", categories_to_remove)
print("Flavors liked by age group:")
print(flavors_by_age_group)
print("Frequent associations between food items:")
print(frequent_associations)




ValueError: The allowed values for a DataFrame are True, False, 0, 1. Found value 7

1. **Data Loading and Preprocessing**:
   - We start by loading the data from the 'data.csv' file using pandas.
   - We identify three columns ('category', 'major_flavor', and 'age') that contain categorical data and need to be label encoded (converted from categorical to numerical).
   - We use the LabelEncoder from scikit-learn to perform label encoding on these columns.

2. **Clustering (KMeans)**:
   - Since we have multiple categories in the 'category' column, we want to group similar categories together.
   - To do this, we apply KMeans clustering on the numerical features (excluding 'feedback') to group similar items (categories) together.
   - We scale the numerical features using StandardScaler to ensure they are on a similar scale before applying KMeans.
   - We choose the number of clusters as 3 in this example, but you can adjust this value based on your specific data.

3. **Classification (Random Forest)**:
   - The objective is to train a classification model to predict customer feedback (ratings) based on the features (columns) in the data.
   - We use the RandomForestClassifier from scikit-learn to build the classification model.
   - The features (X) are all the columns in the data except for the 'feedback' column, and the target variable (y) is the 'feedback' column.
   - We fit the model on the data to learn the patterns and relationships between features and feedback ratings.

4. **Making Predictions**:
   - After training the classification model, we use it to make predictions on the same dataset.
   - We create a new column 'predicted_feedback' and store the predicted feedback ratings for each row in the data.

5. **Suggestions for the Owner**:
   - We identify categories with low feedback ratings by selecting those rows where the predicted feedback is less than 4 (out of 5).
   - We group the data by 'major_flavor' and 'age' to find the average feedback rating for each combination of flavor and age group.
   - This provides insights into which flavors are liked more by different age groups.

The code provides the owner with suggestions on which categories may need improvement based on low feedback ratings and which flavors are more popular among different age groups. The KMeans clustering helps in organizing categories, and the Random Forest classification helps in predicting feedback ratings.

In [21]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.ensemble import RandomForestClassifier

# Load the data
data = pd.read_csv('data.csv')

label_encoder = LabelEncoder()
for column in data.columns:
    if data[column].dtype == 'object' and column not in ['age', 'feedback']:
        data[column] = label_encoder.fit_transform(data[column])

# Perform clustering to group similar items (categories) together
# We'll use KMeans for this example
scaler = StandardScaler()
X = data.drop(columns=['feedback'])
X_scaled = scaler.fit_transform(X)
kmeans = KMeans(n_clusters=3, random_state=42)
data['category_cluster'] = kmeans.fit_predict(X_scaled)

# Train a classification model to predict customer feedback based on features
X = data.drop(columns=['feedback'])
y = data['feedback']
model = RandomForestClassifier(random_state=42)
model.fit(X, y)

# Make predictions to identify items with low feedback ratings
data['predicted_feedback'] = model.predict(X)

# Suggestions for the owner
# Find categories with low feedback ratings
categories_to_remove = data[data['predicted_feedback'] < 4]['category_cluster'].unique()

# Find flavors that are liked in different age groups
flavors_by_age_group = data.groupby(['major_flavor', 'age'])['feedback'].mean().reset_index()

print("Categories to remove:", categories_to_remove)
print("Flavors liked by age group:")
print(flavors_by_age_group)


Categories to remove: [2 0 1]
Flavors liked by age group:
    major_flavor  age  feedback
0              0   18       4.0
1              0   19       5.0
2              0   22       4.5
3              0   26       2.0
4              0   28       3.0
..           ...  ...       ...
62             3   41       5.0
63             3   42       2.0
64             3   43       2.0
65             3   48       4.0
66             3   49       4.0

[67 rows x 3 columns]
