# Attrition Analysis for Green Destinations

This notebook computes the attrition rate and examines how **Age**, **Years at Company**, and **Monthly Income** relate to employee attrition at Green Destinations.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [None]:
df = pd.read_csv('/mnt/data/green_destination.csv')
df.head()

In [None]:
# 1. Overall Attrition Rate
attr_rate = df['Attrition'].value_counts(normalize=True)['Yes'] * 100
print(f"Overall Attrition Rate: {attr_rate:.2f}%")

In [None]:
# 2. Attrition by Age Groups
bins = [18, 25, 35, 45, 55, 65]
df['AgeGroup'] = pd.cut(df['Age'], bins)
age_attr = df.groupby('AgeGroup')['Attrition'].value_counts(normalize=True).unstack()
print(age_attr)
age_attr['Yes'].plot(kind='bar')
plt.title('Attrition Rate by Age Group')
plt.ylabel('Proportion')
plt.show()

In [None]:
# 3. Attrition by Years at Company
bins = [0, 2, 5, 10, 20, 40]
df['YACGroup'] = pd.cut(df['YearsAtCompany'], bins)
yac_attr = df.groupby('YACGroup')['Attrition'].value_counts(normalize=True).unstack()
print(yac_attr)
yac_attr['Yes'].plot(kind='bar')
plt.title('Attrition Rate by Years at Company')
plt.ylabel('Proportion')
plt.show()

In [None]:
# 4. Attrition by Monthly Income Quartiles
df['IncomeQ'] = pd.qcut(df['MonthlyIncome'], 4)
inc_attr = df.groupby('IncomeQ')['Attrition'].value_counts(normalize=True).unstack()
print(inc_attr)
inc_attr['Yes'].plot(kind='bar')
plt.title('Attrition Rate by Income Quartile')
plt.ylabel('Proportion')
plt.show()

In [None]:
# 5. Logistic Regression Analysis
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler

# Encode Attrition (Yes=1/No=0)
le = LabelEncoder()
df['Attrition_n'] = le.fit_transform(df['Attrition'])

# Features and scaling
X = df[['Age', 'YearsAtCompany', 'MonthlyIncome']]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
y = df['Attrition_n']

# Fit model
model = LogisticRegression()
model.fit(X_scaled, y)

# Coefficients
coefs = pd.Series(model.coef_[0], index=X.columns)
print('Logistic Regression Coefficients:')
print(coefs)