In [None]:
# Task 1: Install & Load Dataset
# !pip install scikit-learn matplotlib pandas

from sklearn.datasets import load_boston
from sklearn.ensemble import IsolationForest
import pandas as pd
import matplotlib.pyplot as plt

# Load the Boston dataset (deprecated in latest sklearn; works up to version 1.2)
boston = load_boston()
df = pd.DataFrame(boston.data, columns=boston.feature_names)

# Task 2: Initialize & Fit Isolation Forest
iso_forest = IsolationForest(contamination=0.1, random_state=42)
iso_forest.fit(df)

# Task 3: Predict Anomalies
df['Anomaly'] = iso_forest.predict(df)
print("Anomaly counts (1 = normal, -1 = anomaly):\n", df['Anomaly'].value_counts())

# Task 4: Visualize Anomalies using two features
plt.figure(figsize=(10, 6))
colors = ['red' if x == -1 else 'green' for x in df['Anomaly']]
plt.scatter(df['RM'], df['LSTAT'], c=colors, alpha=0.6)
plt.xlabel('Average number of rooms (RM)')
plt.ylabel('% lower status population (LSTAT)')
plt.title('Isolation Forest - Anomaly Detection')
plt.show()

# Task 5: Test with Different Contamination Value
iso_forest_low = IsolationForest(contamination=0.05, random_state=42)
iso_forest_low.fit(df.drop(columns=['Anomaly']))
df['Anomaly_0.05'] = iso_forest_low.predict(df.drop(columns=['Anomaly']))
print("\nAnomaly count with contamination=0.05:")
print(df['Anomaly_0.05'].value_counts())

In [None]:
from google.colab import drive
drive.mount('/content/drive')