<div style="background-color: lightgreen; color: black; padding: 4px;">
    <h3>1. Neural Networks
</h3> </div>

<div style="background-color: lightblue; color: black; padding: 4px;">
    <h4> Building Our First Neural Network with Keras
</h4> </div>

In [None]:
pip install numpy pandas matplotlib seaborn scikit-learn jupyter ipykernel


In [None]:
import tensorflow as tf

In [None]:
import tensorflow as tf
mnist = tf.keras.datasets.mnist

(x_train, y_train),(x_test, y_test) = mnist.load_data()

x_train, x_test = x_train / 255.0, x_test / 255.0

model = tf.keras.models.Sequential([
  tf.keras.layers.Flatten(input_shape=(28, 28)),
  tf.keras.layers.Dense(128, activation='relu'),
  tf.keras.layers.Dropout(0.2),
  tf.keras.layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
  loss='sparse_categorical_crossentropy',
  metrics=['accuracy'])

model.fit(x_train, y_train, epochs=5)
model.evaluate(x_test, y_test)

## building and training the model

In [None]:
!pip install torch

In [None]:
from keras.models import Sequential
from keras.layers import Dense
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_classification

# Generate a simple binary classification dataset
X, y = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the neural network model
model = Sequential([
    Dense(16, activation='relu', input_shape=(X_train.shape[1],)),  # Input layer
    Dense(8, activation='relu'),                                   # Hidden layer
    Dense(1, activation='sigmoid')                                  # Output layer (binary classification)
])

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(X_test, y_test)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

<div style="background-color: lightblue; color: black; padding: 4px;">
    <h4> Visualizing Model Performance
</h4> </div>

To understand how the model is performing over time, let's plot the **training** and **validation** loss and accuracy across epochs.

---

In [None]:
import matplotlib.pyplot as plt

# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

In [None]:
# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

##  Making Predictions with the Trained Model

In [None]:
test_loss, test_accuracy = model.evaluate(X, y)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

## Download the trained model

In [None]:
# Save the trained model to a file
model.save("tf_intro_trained_model.h5")

In [None]:
# Reuse the same model to make predictions

from keras.models import load_model

# Load the saved model
loaded_model = load_model("tf_intro_trained_model.h5")

In [None]:
# Evaluate the loaded model
test_loss, test_accuracy = loaded_model.evaluate(X_test, y_test)
print(f"Loaded Model Test Loss: {test_loss}")
print(f"Loaded Model Test Accuracy: {test_accuracy}")

In [None]:
# Make predictions with the loaded model
predictions = loaded_model.predict(X_test[:5])
print(predictions)

<div style="background-color: lightgreen; color: black; padding: 4px;">
    <h3>2. Create your own Neural Networks
</h3> </div>

**1. Start with importing libraries and dataset**

In [None]:
#pip install pandas numpy tensorflow scikit-learn

In [None]:
# pip install kagglehub # if kagglehub is not installed yet
#import kagglehub

# Download latest version
#path = kagglehub.dataset_download("blastchar/telco-customer-churn")

#print("Path to dataset files:", path)

In [None]:
import sys
print(sys.executable)


In [None]:
import kagglehub
import tensorflow as tf
print(tf.__version__)


In [None]:
from pathlib import Path

path = kagglehub.dataset_download("blastchar/telco-customer-churn")
path = Path(path)

list(path.iterdir())


In [None]:
import pandas as pd
from pathlib import Path
import kagglehub

path = kagglehub.dataset_download("blastchar/telco-customer-churn")
path = Path(path)

csv_file = list(path.glob("*.csv"))[0]
data = pd.read_csv(csv_file)

data.head()


In [None]:
import pandas as pd
from pathlib import Path
import kagglehub

# Download dataset
path = kagglehub.dataset_download("blastchar/telco-customer-churn")
path = Path(path)

# Auto-detect CSV file
csv_file = list(path.glob("*.csv"))[0]

# Load dataset
data = pd.read_csv(csv_file)

data.head()


2. **Data Cleaning**
- We can check for missing values and handle them if needed
- We need to turn catagorical values into numeric

In [None]:
# Clean the dataset
# Drop unnecessary columns (e.g., customerID)
data = data.drop(columns=['customerID'])

# Handle missing or invalid values
# Convert 'TotalCharges' to numeric, replacing errors with NaN
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data = data.dropna()  # Drop rows with NaN values


In [None]:
data.info() # check what kind of data there is

- **Encode Categorical Features**
- Categorical features need to be encoded into numerical values. You can use OneHotEncoder for this task:

In [None]:
# Encode categorical features
categorical_columns = ['gender', 'Partner', 'Dependents', 'PhoneService', 
                       'MultipleLines', 'InternetService', 'OnlineSecurity',
                       'DeviceProtection', 'TechSupport', 'StreamingTV',
                       'StreamingMovies', 'Contract', 'PaperlessBilling', 
                       'PaymentMethod', 'Churn']

In [None]:
# Apply label encoding to categorical columns
encoder = LabelEncoder()
for col in categorical_columns:
    data[col] = encoder.fit_transform(data[col])

**Select the target feature**
- standardize the numeric columns

In [None]:
# Separate features (X) and target (y)
X = data.drop(columns=['Churn'])  # Features
y = data['Churn']                 # Target (Churn)

In [None]:
# Standardize numeric columns
scaler = StandardScaler()
numeric_columns = ['tenure', 'MonthlyCharges', 'TotalCharges']
X[numeric_columns] = scaler.fit_transform(X[numeric_columns])

**3. Train test split**

In [None]:
# Split the dataset into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

**4. Create a Neural Network:**

A simple 3-layer neural network with two hidden layers:
- 16 neurons in the first hidden layer.
- 8 neurons in the second hidden layer.

Output layer uses the sigmoid activation function for binary classification.

In [None]:
# Build a simple neural network
model = Sequential([
    Dense(16, activation='relu', input_shape=(19,)),  # Input layer
    Dense(8, activation='relu'),                                   # Hidden layer
    Dense(1, activation='sigmoid')                                # Output layer (binary classification)
])

**5. Training the model:**
- Trained the model for 10 epochs with a batch size of 32.

In [None]:
import pandas as pd
from pathlib import Path
import kagglehub

path = kagglehub.dataset_download("blastchar/telco-customer-churn")
path = Path(path)

csv_file = list(path.glob("*.csv"))[0]
data = pd.read_csv(csv_file)

print(data.shape)  # mesti ~ (7043, 21)


In [None]:
data['Churn'] = data['Churn'].map({'Yes': 1, 'No': 0})

In [None]:
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')

data = data.dropna(subset=['TotalCharges'])

print(data.shape)  # mesti > 7000 rows


In [None]:
X = data.drop('Churn', axis=1)
y = data['Churn']

X = pd.get_dummies(X, drop_first=True)

In [None]:
print(data.shape)     # > 7000 rows
print(X.shape)        # > 7000 rows
print(y.shape)


In [None]:
print(data['Churn'].unique())
print(data['Churn'].dtype)


In [None]:
# Cari column yang masih string / object
obj_cols = X.select_dtypes(include='object').columns
print(obj_cols)


In [None]:
import pandas as pd
from pathlib import Path
import kagglehub

# Load dataset
path = kagglehub.dataset_download("blastchar/telco-customer-churn")
path = Path(path)
csv_file = list(path.glob("*.csv"))[0]
data = pd.read_csv(csv_file)

# Encode target
data['Churn'] = data['Churn'].map({'Yes': 1, 'No': 0})

# Fix TotalCharges
data['TotalCharges'] = pd.to_numeric(data['TotalCharges'], errors='coerce')
data = data.dropna(subset=['TotalCharges'])

# Drop ID
data = data.drop('customerID', axis=1)

# Split X y
X = data.drop('Churn', axis=1)
y = data['Churn']

# One-hot encode
X = pd.get_dummies(X, drop_first=True)

print(X.select_dtypes(include='object'))  # mesti kosong
print(y.unique(), y.dtype)


In [None]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

y_train = y_train.values
y_test = y_test.values


In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

model = Sequential([
    Dense(32, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(16, activation='relu'),
    Dense(1, activation='sigmoid')
])

model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=['accuracy']
)


In [None]:
history = model.fit(
    X_train,
    y_train,
    epochs=10,
    batch_size=32,
    validation_split=0.2
)


### **We got an error**
Let's debug this issue:

In [None]:
print(X_train.dtype)

In [None]:
print(X_train.shape)
print(y_train.shape)

In [None]:
import numpy as np
print(np.isnan(X_train).any())  # False

In [None]:
X.select_dtypes(include='object')
# Index([], dtype='object')

In [None]:
type(X_train)
# numpy.ndarray

In [None]:
print(type(X_train))     # numpy.ndarray
print(X_train.dtype)    # float64 / float32


In [None]:
print(type(X_train))     # numpy.ndarray
print(X_train.dtype)    # float32 / float64

print(type(y_train))    # numpy.ndarray
print(y_train.dtype)    # int64 / float


In [None]:
import numpy as np
print(np.isnan(X_train).any())  # False

In [None]:
# Encode 'OnlineBackup' column
# 1. Encode target
data['Churn'] = data['Churn'].map({'Yes': 1, 'No': 0})

# 2. One-hot encode features
X = pd.get_dummies(data.drop('Churn', axis=1), drop_first=True)

# 3. Scale â†’ NumPy
X_train = scaler.fit_transform(X_train)

In [None]:
print(data.dtypes)  # All columns should now be numeric


In [None]:
print(X.dtypes)

In [None]:
print(X_train.dtype)  # float32 / float64

**Proceed with training**

In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
print(X.dtypes.value_counts())

In [None]:
import numpy as np

print("Non-numeric columns in X (pandas):")
print(X.select_dtypes(exclude=['number', 'bool']).columns)

In [None]:
print(type(X_train))      # numpy.ndarray
print(X_train.dtype)     # float64 / float32

In [None]:
print(np.isnan(X_train).any())   # False

In [None]:
X_train = X_train.drop('OnlineBackup', axis=1)
X_train = X_train.drop('InternetService', axis=1)

In [None]:
print(X_train.dtypes)  # All columns should now be numeric

In [None]:
istory = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)

In [None]:
print(X_train.shape)  # Check the number of columns (should be 18 now)

In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.InputLayer(input_shape=(17,)),  # Adjusted to 17 features
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1, activation='sigmoid')
])

In [None]:
print(X_train.dtypes)  # Check the data types of all columns

In [None]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])


In [None]:
history = model.fit(X_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


In [None]:
test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=32)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

In [None]:
X_test.dtypes

In [None]:
# Drop multiple columns
X_test = X_test.drop(['InternetService', 'OnlineBackup'], axis=1)

In [None]:
# Check for accuracy

test_loss, test_accuracy = model.evaluate(X_test, y_test, batch_size=32)
print(f"Test Loss: {test_loss}")
print(f"Test Accuracy: {test_accuracy}")

In [None]:
# Make predictions (model outputs probabilities for class 1)
predictions = model.predict(X_test)

# Convert probabilities to binary predictions (0 or 1)
predictions = (predictions > 0.5).astype(int)
# This comparison checks if the predicted probability for class 1 is greater than 0.5. If it is, the output is True (1), else False (0).
# .astype(int): Converts the boolean True/False values into 1/0 for final binary classification.


# If you want the predictions in a more readable format
print(predictions[:10])  # Print first 10 predictions