In [None]:
# Import Libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.preprocessing import LabelEncoder

# Load Data
data = pd.read_csv('sales_data.csv')

# Data Preprocessing
# Handle missing values
data.ffill(inplace=True)

# Print column names to ensure correct column reference
print(data.columns)

# Encoding categorical variables
label_encoder = LabelEncoder()

# Assuming 'User ID' and 'Product ID' are the columns that need encoding
data['User ID'] = label_encoder.fit_transform(data['User ID'])
data['Product ID'] = label_encoder.fit_transform(data['Product ID'])

# Assuming 'Rating' is the target column
target_column = 'Rating'

# Split data into features and target
X = data.drop(target_column, axis=1)
y = data[target_column]

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Choose a Model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the Model
model.fit(X_train, y_train)

# Evaluate Model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred, average='weighted')

# Display results
print(f'Accuracy: {accuracy:.2f}')
print(f'F1 Score: {f1:.2f}')
print(classification_report(y_test, y_pred))

# Plot feature importance
feature_importances = pd.Series(model.feature_importances_, index=X.columns)
feature_importances.nlargest(10).plot(kind='barh')
plt.show()