# Setting up

In [None]:
import pandas as pd
from textblob import TextBlob
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [None]:
# Load cleaned dataset
df = pd.read_excel("cleaned_complaints.xlsx")

# Preview data
print("Dataset loaded successfully!")
print(f"Total records: {len(df):,}")
print(df.head())

# Define Feature Set for Modeling

In [None]:
# Check available sentiment-related columns
required_features = ['sentiment_polarity', 'anger_score', 'trust_score']
optional_feature = 'sentiment_subjectivity'

# Add optional feature if available
if optional_feature in df.columns:
    required_features.append(optional_feature)

# Keep only relevant columns
model_df = df[required_features + ['dispute_binary']].dropna()
print(f"\nRecords available for modeling: {len(model_df):,}")

# Split Data into Train/Test Sets

In [None]:
if len(model_df) > 20:
    X_train, X_test, y_train, y_test = train_test_split(
        model_df[required_features],
        model_df['dispute_binary'],
        test_size=0.3,
        random_state=42
    )
else:
    X_train, X_test, y_train, y_test = (
        model_df[required_features],
        model_df[required_features],
        model_df['dispute_binary'],
        model_df['dispute_binary']
    )

# Train Logistic Regression Model

In [None]:
model = LogisticRegression(random_state=42, max_iter=1000)
model.fit(X_train, y_train)

# Model accuracy
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("\n------------------------------------------")
print(" MODEL PERFORMANCE")
print("------------------------------------------")
print(f"Model Accuracy: {accuracy * 100:.2f}%")
print(f"Training Samples: {len(X_train):,}")
print(f"Testing Samples:  {len(X_test):,}")

# Interpret Model Coefficients

In [None]:
coef_df = pd.DataFrame({
    'Feature': required_features,
    'Coefficient': model.coef_[0]
})

# Sort by importance
coef_df['AbsValue'] = coef_df['Coefficient'].abs()
coef_df = coef_df.sort_values('AbsValue', ascending=False).drop(columns=['AbsValue'])

print("\n------------------------------------------")
print(" FEATURE IMPORTANCE & INTERPRETATION")
print("------------------------------------------")

for _, row in coef_df.iterrows():
    coef = row['Coefficient']
    direction = "INCREASES" if coef > 0 else "DECREASES"
    magnitude = abs(coef)
    
    # Simple significance indicator based on magnitude
    if magnitude > 0.5:
        stars = "***"
    elif magnitude > 0.2:
        stars = "**"
    else:
        stars = "*"
    
    print(f"• {row['Feature']:<22} {coef:+.3f} {stars}")
    print(f"   → {direction} dispute probability")