In [2]:
# Importing dependencies
import numpy as np
import pandas as pd
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import VGG19
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# Reading the EDA CSV file
eda_data = pd.read_csv("Dataset\eda.csv")
eda_data.head()

Unnamed: 0,Uninfected_Edge_Density,Infected_Edge_Density,Parasitized_Avg_Colors,Uninfected_Avg_Colors,Infected_MeanBlobSize,Infected_MaxBlobSize,Uninfected_MeanBlobSize,Uninfected_MaxBlobSize
0,0.1408,0.1744,133.491832,118.653807,5.027778,13.888889,2.074074,7.444444
1,0.1328,0.1696,132.62104,126.433029,5.833333,13.888889,1.585859,7.444444
2,0.1344,0.168,113.769025,128.820641,4.866667,13.888889,3.148148,13.888889
3,0.128,0.184,121.016622,129.87628,3.148148,13.888889,6.37037,17.111111
4,0.1376,0.1696,126.731932,128.073351,2.611111,7.444444,4.222222,13.888889


In [4]:
# Segregating infected and uninfected df
infected_df = eda_data[["Infected_Edge_Density","Parasitized_Avg_Colors",'Infected_MeanBlobSize','Infected_MaxBlobSize']]
uninfected_df = eda_data[['Uninfected_Edge_Density','Uninfected_Avg_Colors','Uninfected_MeanBlobSize', 'Uninfected_MaxBlobSize']]


In [5]:
# Renaming Columns
infected_df = infected_df.copy()
infected_df.rename(columns={
    "Infected_Edge_Density": "Edge_Density",
    "Parasitized_Avg_Colors": "Avg_Colors",
    "Infected_MeanBlobSize": "MeanBlobSize",
    "Infected_MaxBlobSize": "MaxBlobSize"
}, inplace=True)

uninfected_df = uninfected_df.copy()
uninfected_df.rename(columns = {"Uninfected_Edge_Density": "Edge_Density",
    "Uninfected_Avg_Colors": "Avg_Colors",
    "Uninfected_MeanBlobSize": "MeanBlobSize",
    "Uninfected_MaxBlobSize": "MaxBlobSize"}, inplace=True)

In [6]:
# Display and add new columns to infected_df
infected_df['Target'], infected_df['Category'] = 0, 'Infected'
infected_df.head()

Unnamed: 0,Edge_Density,Avg_Colors,MeanBlobSize,MaxBlobSize,Target,Category
0,0.1744,133.491832,5.027778,13.888889,0,Infected
1,0.1696,132.62104,5.833333,13.888889,0,Infected
2,0.168,113.769025,4.866667,13.888889,0,Infected
3,0.184,121.016622,3.148148,13.888889,0,Infected
4,0.1696,126.731932,2.611111,7.444444,0,Infected


In [7]:
# Display and add new columns to uninfected_df
uninfected_df['Target'], uninfected_df['Category'] = 1, 'Uninfected'
uninfected_df.head()

Unnamed: 0,Edge_Density,Avg_Colors,MeanBlobSize,MaxBlobSize,Target,Category
0,0.1408,118.653807,2.074074,7.444444,1,Uninfected
1,0.1328,126.433029,1.585859,7.444444,1,Uninfected
2,0.1344,128.820641,3.148148,13.888889,1,Uninfected
3,0.128,129.87628,6.37037,17.111111,1,Uninfected
4,0.1376,128.073351,4.222222,13.888889,1,Uninfected


In [8]:
# Combining both dataframes for EDA in Tableau
new_eda_df = pd.concat([infected_df,uninfected_df])
new_eda_df

Unnamed: 0,Edge_Density,Avg_Colors,MeanBlobSize,MaxBlobSize,Target,Category
0,0.1744,133.491832,5.027778,13.888889,0,Infected
1,0.1696,132.621040,5.833333,13.888889,0,Infected
2,0.1680,113.769025,4.866667,13.888889,0,Infected
3,0.1840,121.016622,3.148148,13.888889,0,Infected
4,0.1696,126.731932,2.611111,7.444444,0,Infected
...,...,...,...,...,...,...
495,0.1360,134.838038,1.716049,7.444444,1,Uninfected
496,0.1392,127.553836,3.148148,13.888889,1,Uninfected
497,0.1296,138.398527,3.577778,13.888889,1,Uninfected
498,0.1248,133.434947,4.222222,13.888889,1,Uninfected


In [9]:
new_eda_df.shape

(1000, 6)

In [45]:
# Saving the df above as CSV for Visual Analysis
new_eda_df.to_csv('EDA-New.csv', index=False)

### SPLITTING DATA: TEST AND TRAINING

In [46]:
# Split the data into features and target
X = new_eda_df[['Edge_Density', 'Avg_Colors', 'MeanBlobSize', 'MaxBlobSize']].values
y = new_eda_df['Target'].values