## **This Notebook is for generating PCA Graph to find optimal number of Principle Components**

### **Importing Libraries**

In [None]:
import pandas as pd
from IPython.display import display, clear_output
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
from sklearn import metrics

### **Reading Data**

In [None]:
df = pd.read_csv('train.csv')
df_test = pd.read_csv('test.csv')
df_trim = df.drop(['subject', 'Activity'], axis = 1)

### **Read the pca_acc.txt file and check last calculated Principal Component Number**

In [None]:
with open('pca_acc.txt', 'r+') as f:
    lines = f.readlines()

if len(lines) != 0:   
    n = int(lines[-1].split(' ')[0]) + 1
else:
    n = 1

### **Run Loop from last number of components to required number**

In [None]:
for i in range(n, 70):    
    clear_output()
    display(f'Running Principal Analysis for {i} Components')
    
    # Fit PCA
    pca = PCA(n_components = i)
    pca.fit(df_trim)
    reduced = pca.transform(df_trim)
    red_test = pca.transform(df_test.iloc[:, :-2])
    
    # Data Prep
    df_pca = pd.DataFrame(reduced)
    df_pca_test = pd.DataFrame(red_test)
    df_pca['activity'] = df['Activity']
    df_pca_test['activity'] = df_test['Activity']

    X_train = df_pca.iloc[:, :-1]
    y_train = df_pca.iloc[:, -1]
    X_test = df_pca_test.iloc[:, :-1]
    y_test = df_pca_test.iloc[:, -1]
    
    # Logistic Regression
    model_lr = LogisticRegression(random_state=0, multi_class='multinomial', penalty='none', solver='newton-cg')
    model_lr.fit(X_train, y_train)
    preds_lr = model_lr.predict(X_test)
    acc_lr = metrics.accuracy_score(y_test, preds_lr)

    # Random Forest
    model_clf = RandomForestClassifier(n_estimators=100)
    model_clf.fit(X_train,y_train)
    preds_clf = model_clf.predict(X_test)
    acc_clf = metrics.accuracy_score(y_test, preds_clf)

    # XGBoost
    model_xgb = xgb.XGBClassifier()
    model_xgb.fit(X_train, y_train)
    preds_xgb = model_xgb.predict(X_test)
    acc_xgb = metrics.accuracy_score(y_test, preds_xgb)
    
    # SVM
    model_svm = SVC()
    model_svm.fit(X_train, y_train)
    preds_svm = model_svm.predict(X_test)
    acc_svm = metrics.accuracy_score(y_test, preds_svm)

    # Write results to txt file
    with open('pca_acc.txt', 'a') as f:
        f.write(f'{i} {acc_lr} {acc_clf} {acc_xgb} {acc_svm}\n')