In [28]:
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler


In [47]:
# Step 1: Load the dataset
data = pd.read_csv('USArrests.csv')
data.head()


Unnamed: 0.1,Unnamed: 0,Murder,Assault,UrbanPop,Rape
0,Alabama,13.2,236,58,21.2
1,Alaska,10.0,263,48,44.5
2,Arizona,8.1,294,80,31.0
3,Arkansas,8.8,190,50,19.5
4,California,9.0,276,91,40.6


In [48]:
# Step 2: Data preprocessing (if required)
# Handling Missing Values (if any)
# Check for missing values
missing_values = data.isnull().sum()
print("Missing Values:\n", missing_values)



Missing Values:
 Unnamed: 0    0
Murder        0
Assault       0
UrbanPop      0
Rape          0
dtype: int64


In [49]:
# Step 3: Feature Scaling
# Standardize the numerical features
scaler = StandardScaler()
scaled_data = scaler.fit_transform(data.iloc[:, 1:])  # Exclude the first column (state names)

In [50]:
# Step 4: Perform PCA
pca = PCA(n_components=2)  # Keep 2 principal components
principal_components = pca.fit_transform(scaled_data)



In [51]:
# Step 5: Convert PCA results back to a DataFrame if needed
principal_df = pd.DataFrame(principal_components, columns=['PC1', 'PC2'])
final_df = pd.concat([data.iloc[:, 0], principal_df], axis=1)

In [52]:
# Step 6: Calculate variance explained
variance_explained = pca.explained_variance_ratio_
variance_pc1 = variance_explained[0]  # Variance explained by PC1
variance_pc2 = variance_explained[1]  # Variance explained by PC2

print("Variance explained by PC1: {:.2f}%".format(variance_pc1 * 100))
print("Variance explained by PC2: {:.2f}%".format(variance_pc2 * 100))

Variance explained by PC1: 62.01%
Variance explained by PC2: 24.74%


In [53]:
# Step 7: Convert PCA results back to a DataFrame if needed
principal_df = pd.DataFrame(principal_components, columns=['PC1', 'PC2'])
final_df = pd.concat([data.iloc[:, 0], principal_df], axis=1)

In [54]:
# Print the final DataFrame
print("\nFinal DataFrame with PCA results:\n", final_df.head())


Final DataFrame with PCA results:
    Unnamed: 0       PC1       PC2
0     Alabama  0.985566  1.133392
1      Alaska  1.950138  1.073213
2     Arizona  1.763164 -0.745957
3    Arkansas -0.141420  1.119797
4  California  2.523980 -1.542934
