<a href="https://colab.research.google.com/github/agg-shambhavi/Boston-Housing/blob/master/Validation_of_features.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.svm import SVC

In [None]:
x_train = pd.read_csv("/content/drive/MyDrive/Purdue/VQC Project/Code/final_data_cnn_ae/x_train.csv")
y_train = np.array(pd.read_csv("/content/drive/MyDrive/Purdue/VQC Project/Code/final_data_cnn_ae/y_train.csv")).ravel()
x_test = pd.read_csv("/content/drive/MyDrive/Purdue/VQC Project/Code/final_data_cnn_ae/x_test.csv")
y_test = np.array(pd.read_csv("/content/drive/MyDrive/Purdue/VQC Project/Code/final_data_cnn_ae/y_test.csv")).ravel()

## Logistic Regression

In [None]:
lreg = LogisticRegression(random_state=42)
lreg.fit(x_train, y_train)

y_train_pred = lreg.predict(x_train)
y_test_pred = lreg.predict(x_test)

In [None]:
train_acc = accuracy_score(y_train, y_train_pred)

test_acc = accuracy_score(y_test, y_test_pred)

print(f"Train accuracy: {train_acc}")
print(f"Test accuracy: {test_acc}")

Train accuracy: 0.5262083049693669
Test accuracy: 0.5370370370370371


In [None]:
print(classification_report(y_test, y_test_pred))

              precision    recall  f1-score   support

           0       0.54      0.63      0.58       956
           1       0.54      0.44      0.49       934

    accuracy                           0.54      1890
   macro avg       0.54      0.54      0.53      1890
weighted avg       0.54      0.54      0.53      1890



## SVM

In [None]:
svm = SVC(kernel="rbf", random_state=42)

svm.fit(x_train, y_train)

y_train_pred = svm.predict(x_train)
y_test_pred = svm.predict(x_test)

train_acc = accuracy_score(y_train, y_train_pred)
test_acc = accuracy_score(y_test, y_test_pred)

print(f"Train accuracy: {train_acc}")
print(f"Test accuracy: {test_acc}")

Train accuracy: 0.6317222600408441
Test accuracy: 0.5841269841269842


In [None]:
# imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
from torch.utils.data import Dataset

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyper parameters
input_size = 8
num_classes = 2
learning_rate = 0.001
batch_size = 64
num_epochs = 50

In [None]:
# Create fully connected network
class NN(nn.Module):
    def __init__(self): # input_size= 28 x 28
        super(NN, self).__init__()
        self.fc1 = nn.Linear(8, 2)
        self.fc2 = nn.Linear(2, 2)


    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = torch.sigmoid(self.fc2(x))
        return x


In [None]:
class PCA_Feat(Dataset):
    def __init__(self, x,y):
        self.x = x.values.astype(np.float32)
        self.y = y
        self.length = len(y)

    def __len__(self):
        return self.length

    def __getitem__(self, index):
        x_item = torch.tensor(self.x[index])
        y_item = torch.tensor(self.y[index])
        
        return (x_item,y_item)

In [None]:
# Load data
train_dataset = PCA_Feat(x=x_train, y=y_train)
train_loader = DataLoader( dataset=train_dataset, batch_size= batch_size)

test_dataset = PCA_Feat(x=x_test, y=y_test)
test_loader = DataLoader(dataset=test_dataset, batch_size= batch_size)

# Initialize the network
model = NN().to(device)


# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
# Train the network
for epoch in range(num_epochs):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # get data to cuda
        data = data.to(device)
        targets = targets.to(device)

        # # # get data to proper shape
        # data = data.reshape(data.shape[0], -1)

        # forward
        scores = model(data)
        loss = criterion(scores, targets)

        # backward
        optimizer.zero_grad() #set all gradients to zero for each batch
        loss.backward()

        # gradient descend or adam step
        optimizer.step()

In [None]:
# check accuracy on training & test set
def check_accuracy(loader, model):
    num_correct = 0
    num_samples = 0
    model.eval()

    with torch.no_grad():
        for x, y in loader:
            x = x.to(device= device)
            y = y.to(device= device)

            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)

        print(f"Got {num_correct}/{num_samples} with accuracy {(float(num_correct)/float(num_samples))*100:.2f}")
        model.train()

In [None]:
check_accuracy(train_loader, model)

Got 2261/4407 with accuracy 51.30


In [None]:
check_accuracy(test_loader, model)

Got 972/1890 with accuracy 51.43
