In [52]:
import numpy as np
import pandas as pd
from sklearn import metrics
from scipy.spatial.distance import cdist
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.decomposition import TruncatedSVD
from sklearn.decomposition import PCA
from sklearn.manifold import Isomap as ISO

import warnings
warnings.filterwarnings("ignore")

heart_df = pd.read_csv("df.csv")

X = heart_df.drop('class', axis=1).values
y = heart_df['class'].values

# Split into training and test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state=42, stratify=y)

##### 1. Take one of the supervised learning models you have built recently and apply at least three dimensionality reduction techniques to it (separately). Be sure to create a short summary of each technique you use. Indicate how each changed the model performance. 

##### Reference: https://machinelearningmastery.com/dimensionality-reduction-algorithms-with-python/


In [51]:
# Singular Value Decomposition

svd = TruncatedSVD(n_components=1)
X_train_svd=svd.fit_transform(X_train)
X_test_svd=svd.transform(X_test)
model_1 = LogisticRegression(random_state=42).fit(X_train_svd, y_train)
best = model_1.score(X_test_svd, y_test)

num_comp = 1

for i in range(2,136): 
    svd = TruncatedSVD(n_components=i)
    X_train_svd=svd.fit_transform(X_train)
    X_test_svd=svd.transform(X_test)
    model_1 = LogisticRegression(random_state=42).fit(X_train_svd, y_train)
    if model_1.score(X_test_svd, y_test) > best:
        best = model_1.score(X_test_svd, y_test)
        num_comp = i
    else:
        pass

print('Optimal Number of Components: ', num_comp) 
print('Accuracy Score: ', best)

Optimal Number of Components:  54
Accuracy Score:  0.7941176470588235


In [48]:
# Principal Component Analysis

pca = PCA(n_components=1)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

model_2 = LogisticRegression(random_state=42).fit(X_train_pca, y_train)
best = model_2.score(X_test_pca, y_test)

num_comp = 1

for i in range(2,136): 
    pca = PCA(n_components=i)
    X_train_pca=pca.fit_transform(X_train)
    X_test_pca=pca.transform(X_test)
    model_2 = LogisticRegression(random_state=42).fit(X_train_pca, y_train)
    if model_2.score(X_test_pca, y_test) > best:
        best = model_2.score(X_test_pca, y_test)
        num_comp = i
    else:
        pass

print('Optimal Number of Components: ', num_comp) 
print('Accuracy Score: ', best)

Optimal Number of Components:  51
Accuracy Score:  0.8235294117647058


In [53]:
# Isomap Embedding

iso = ISO(n_components=1)
X_train_iso = iso.fit_transform(X_train)
X_test_iso = iso.transform(X_test)

model_3 = LogisticRegression(random_state=42).fit(X_train_iso, y_train)
best = model_3.score(X_test_iso, y_test)

num_comp = 1

for i in range(2,136): 
    iso = ISO(n_components=i)
    X_train_iso=iso.fit_transform(X_train)
    X_test_iso=iso.transform(X_test)
    model_3 = LogisticRegression(random_state=42).fit(X_train_iso, y_train)
    if model_3.score(X_test_iso, y_test) > best:
        best = model_3.score(X_test_iso, y_test)
        num_comp = i
    else:
        pass

print('Optimal Number of Components: ', num_comp) 
print('Accuracy Score: ', best)

Optimal Number of Components:  23
Accuracy Score:  0.7352941176470589


### EXPLAIN HERE!

##### 2. Write a function that will indicate if an inputted IPv4 address is accurate or not.  IP addresses are valid if they have 4 values between 0 and 255 (inclusive), punctuated by periods.

#### Input 1:
#### 2.33.245.5
#### Output 1:
#### True


#### Input 2:
#### 12.345.67.89
#### Output 2:
#### False

In [123]:
def IPA(address):
    try:
        numbers = address.split('.')
        if len(numbers) != 4:
            return False
        for number in numbers:
            if int(number) < 0 or int(number) > 255:
                return False
        return True
    except Exception as e:
        return False

In [125]:
IPA('2.33.245.5')

True

In [126]:
IPA('12.345.67.89')

False

In [124]:
IPA('-32.53.208.33')

False

In [106]:
IPA('32.53.208.330')

False

In [107]:
IPA('32a.53.208.33')

False