# **PCA**

In [1]:
import numpy as np
import pandas as pd

In [2]:
df = pd.read_csv('apple_quality.csv')
df.head()

Unnamed: 0,A_id,Size,Weight,Sweetness,Crunchiness,Juiciness,Ripeness,Acidity,Quality
0,0,-3.970049,-2.512336,5.34633,-1.012009,1.8449,0.32984,-0.49159,good
1,1,-1.195217,-2.839257,3.664059,1.588232,0.853286,0.86753,-0.722809,good
2,2,-0.292024,-1.351282,-1.738429,-0.342616,2.838636,-0.038033,2.621636,bad
3,3,-0.657196,-2.271627,1.324874,-0.097875,3.63797,-3.413761,0.790723,good
4,4,1.364217,-1.296612,-0.384658,-0.553006,3.030874,-1.303849,0.501984,good


In [3]:
df.isnull().sum()

A_id           0
Size           0
Weight         0
Sweetness      0
Crunchiness    0
Juiciness      0
Ripeness       0
Acidity        0
Quality        0
dtype: int64

In [4]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(df.drop(columns=['A_id','Quality']),df['Quality'],test_size=0.2)

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.decomposition import PCA

### Applying Model Without PCA

In [6]:
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
accuracy_no_pca = accuracy_score(y_test, y_pred)

### Applying Model With PCA

In [7]:
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

model_pca = LogisticRegression(max_iter=200)
model_pca.fit(X_train_pca, y_train)
y_pred_pca = model_pca.predict(X_test_pca)
accuracy_with_pca = accuracy_score(y_test, y_pred_pca)

#### Results

In [8]:
print('Accuracy without PCA ',accuracy_no_pca)
print('Accuracy with PCA ',accuracy_with_pca)

Accuracy without PCA  0.75375
Accuracy with PCA  0.61625


#### PCA doesn't always improve accuracy like in this case. The effect of PCA on accuracy depends on various factors such as the dataset, the nature of the features, the complexity of the problem, and the choice of the number of components to keep.