# Gradient Boost for Reviews Dataset

Step1:Import Libraries

In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, precision_score, f1_score, classification_report


Step 2: Load the Dataset

In [7]:
df_reviews = pd.read_csv('amazon_review_cleaned.csv')


Step 3: Encode the Target Class Column

In [8]:
label_encoder = LabelEncoder()
df_reviews['Class'] = label_encoder.fit_transform(df_reviews['Class'])


Step 4: Split Features (X) and Target (y)

In [9]:
X = df_reviews.drop(columns=['Class', 'ID'])  # Drop ID, it is not a feature
y = df_reviews['Class']


Step 5: Stratified Train-Test Split

In [10]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


Step 6: Initialize and Train Gradient Boosting Model

In [11]:
model = GradientBoostingClassifier()
model.fit(X_train, y_train)

print("✅ Model trained successfully!")


✅ Model trained successfully!


Step 7: Make Predictions

In [12]:
y_pred = model.predict(X_test)


Step 8: Evaluate the Model

In [13]:
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

print("\n=== Evaluation Results on Reviews Dataset ===")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision (weighted): {precision:.4f}")
print(f"F1 Score (weighted): {f1:.4f}")

print("\nClassification Report:")
print(classification_report(y_test, y_pred))



=== Evaluation Results on Reviews Dataset ===
Accuracy: 0.4000
Precision (weighted): 0.4574
F1 Score (weighted): 0.4042

Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.33      0.50         3
           1       0.00      0.00      0.00         2
           2       0.00      0.00      0.00         2
           3       0.75      1.00      0.86         3
           4       0.20      0.33      0.25         3
           5       0.25      0.33      0.29         3
           6       0.00      0.00      0.00         3
           7       1.00      0.33      0.50         3
           8       0.00      0.00      0.00         3
           9       0.75      0.75      0.75         4
          10       1.00      0.67      0.80         3
          11       0.14      0.25      0.18         4
          12       0.50      0.33      0.40         3
          13       0.33      0.25      0.29         4
          14       0.00      0.00      0.00 

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [14]:
# Different parameter sets to try
param_grid = [
    {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 3},
    {'n_estimators': 200, 'learning_rate': 0.1, 'max_depth': 3},
    {'n_estimators': 100, 'learning_rate': 0.05, 'max_depth': 3},
    {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 5},
]

# Store results
results = []

for params in param_grid:
    print(f"\nTraining with parameters: {params}")
    model = GradientBoostingClassifier(
        n_estimators=params['n_estimators'],
        learning_rate=params['learning_rate'],
        max_depth=params['max_depth'],
        random_state=42
    )
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='weighted', zero_division=0)
    f1 = f1_score(y_test, y_pred, average='weighted')

    print(f"Accuracy: {accuracy:.4f}")
    print(f"Precision (weighted): {precision:.4f}")
    print(f"F1 Score (weighted): {f1:.4f}")

    results.append({
        'params': params,
        'accuracy': accuracy,
        'precision': precision,
        'f1_score': f1
    })



Training with parameters: {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 3}
Accuracy: 0.4000
Precision (weighted): 0.4925
F1 Score (weighted): 0.4201

Training with parameters: {'n_estimators': 200, 'learning_rate': 0.1, 'max_depth': 3}
Accuracy: 0.4067
Precision (weighted): 0.4960
F1 Score (weighted): 0.4224

Training with parameters: {'n_estimators': 100, 'learning_rate': 0.05, 'max_depth': 3}
Accuracy: 0.4000
Precision (weighted): 0.4819
F1 Score (weighted): 0.4113

Training with parameters: {'n_estimators': 100, 'learning_rate': 0.1, 'max_depth': 5}
Accuracy: 0.4000
Precision (weighted): 0.5050
F1 Score (weighted): 0.4153
