In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from statsmodels.stats.outliers_influence import variance_inflation_factor

# Load the dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target
target_names = iris.target_names

# Standardize the features for better performance
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Create a DataFrame for the dataset
df = pd.DataFrame(data=X, columns=iris.feature_names)
df['target'] = iris.target_names[y]

# Plot histograms for each class
plt.figure(figsize=(12, 8))
for i, target_name in enumerate(target_names):
    plt.hist(df[df['target'] == target_name]['sepal length (cm)'], bins=10, alpha=0.6, label=target_name)

plt.xlabel('sepal length (cm)')
plt.ylabel('Count')
plt.title('Histograms of sepal length by class')
plt.legend()
plt.show()

# Plot histogram for the whole dataset
plt.figure(figsize=(12, 8))
for i, target_name in enumerate(target_names):
    plt.hist(df[df['target'] == target_name]['sepal length (cm)'], bins=10, alpha=0.6, label=target_name, density=True)

plt.xlabel('sepal length (cm)')
plt.ylabel('Density')
plt.title('Histogram of sepal length for the whole dataset')
plt.legend()
plt.show()

# Compute correlation matrix
corr_matrix = df.corr()

# Plot correlation matrix
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Matrix')
plt.show()

# Assess multicollinearity using VIF
vif_data = pd.DataFrame()
vif_data["Feature"] = iris.feature_names
vif_data["VIF"] = [variance_inflation_factor(X, i) for i in range(X.shape[1])]

print("Variance Inflation Factor (VIF):")
print(vif_data)
