In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import tensorflow as tf
import tensorflow_hub as hub
from imblearn.over_sampling import RandomOverSampler

plt.style.use('dark_background') 

def highlight_odd_rows(s):
    styles = []
    for i in range(len(s)):
        if i % 2 == 1:
            styles.append('background-color: indigo; border: 1px solid white;')
        else:
            styles.append('border: 1px solid white;')
    return styles

In [None]:
df = pd.read_csv("wine-reviews.csv", usecols = ['country', 'description', 'points', 'price', 'variety', 'winery'])

In [None]:
df.head().style.apply(highlight_odd_rows)

In [None]:
df = df.dropna(subset = ["description", "points"])

In [None]:
plt.hist(df.points, bins=20)
plt.show()

In [None]:
df2 = pd.read_csv("diabetes.csv")

In [None]:
df2.head().style.apply(highlight_odd_rows)

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [None]:
over = RandomOverSampler()
X, y = over.fit_resample(X, y)
data = np.hstack((X, np.reshape(y,(-1,1))))
transformed_df = pd.DataFrame(data, columns= df2.columns)

In [None]:
len(transformed_df[transformed_df["Outcome"]==1]),len(transformed_df[transformed_df["Outcome"]==0])

In [None]:
for i in range(len(df2.columns[:-1])):
    label = df2.columns[i]
    plt.hist(df2[df2['Outcome']==1][label], color = 'lightblue', label = 'Diabetes', alpha = 0.7, density= True, bins =  15)
    plt.hist(df2[df2['Outcome']==0][label], color = 'pink', label = "No diabetes", alpha = 0.7, density= True, bins =  15)
    plt.title(label)
    plt.ylabel("N")
    plt.xlabel(label)
    plt.legend()
    plt.show()

In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=0)
X_valid, X_test, y_valid, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=0)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
model.compile(
    optimizer = tf.keras.optimizers.Adam(learning_rate = 0.001), 
    loss = tf.keras.losses.BinaryCrossentropy(),
    metrics = ['accuracy']
             )

In [None]:
model.evaluate(X_train, y_train)

In [None]:
model.evaluate(X_valid, y_valid)

In [None]:
model.fit(X_train, y_train, batch_size = 16, epochs = 20, validation_data = (X_valid, y_valid))

In [None]:
model.evaluate(X_test, y_test)