In [None]:
from google.colab import files

uploaded = files.upload()

for fn in uploaded.keys():
  print('User uploaded file "{name}" with length {length} bytes'.format(
      name=fn, length=len(uploaded[fn])))


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns



In [None]:
data = pd.read_csv("facies_data2.csv")
data.head()


In [None]:
data.describe()

In [None]:
stat = data[['DeltaPHI','GR','ILD_log10','PHIND','Depth','PE']]
stat.hist (figsize=(12,20),bins=30)
plt.tight_layout()
plt.show()

In [None]:
data2 =data.corr(method='pearson', numeric_only=True)
plt.figure(figsize=(10,10))
sns.heatmap(data2, cmap='coolwarm', annot=True)


In [None]:
data = data.sort_values("Depth").reset_index(drop=True)

depth = data["Depth"]
GR = data["GR"]
ILD_log10 = data["ILD_log10"]
DeltaPHI = data["DeltaPHI"]
PHIND = data["PHIND"]
PE = data["PE"]


In [None]:
fig, (ax1, ax2, ax3, ax4) = plt.subplots(1, 4, figsize=(10,25))


ax1.plot(GR, depth, color='green')
ax1.invert_yaxis()
ax1.set_title('Gamma Ray')
ax1.set_xlabel('Gamma ray')
ax1.set_ylabel('Depth')

ax2.plot(ILD_log10, depth, color='red')
ax2.invert_yaxis()
ax2.set_title('Resistivity')
ax2.set_xlabel('Resistivity')
ax2.set_ylabel('Depth')

ax3.plot(DeltaPHI, depth, color='yellow')
ax3.invert_yaxis()
ax3.set_title('DeltaPHI')
ax3.set_xlabel('DeltaPHI')
ax3.set_ylabel('Depth')

**Crossplot**

In [None]:
sc = plt.scatter(GR, DeltaPHI, c=data['Facies'], cmap='tab10')
plt.colorbar(sc, label="Facies")


In [None]:
sns.scatterplot(data=data, x=GR, y=DeltaPHI, palette="tab20", hue="Facies")
plt.title('GR VS DeltaPHI')
plt.show()


#

In [None]:
data.head()

**ML MODEL**

In [None]:
#FEATURE
Feature = ['GR','ILD_log10','DeltaPHI','PHIND','PE']


#data = data[Feature].dropna()
data = data[Feature + ['Depth']].dropna()



In [None]:
# add feature list for clustering
feature = ["GR", "ILD_log10", "DeltaPHI", "PHIND", "PE"]

#Filter data model
data = data[feature + ['Depth']].dropna()

X = data[feature]

In [None]:
#Preprocessing - Standardization
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_standard = scaler.fit_transform(X)

In [None]:
#Model Training

n_rocktype = 4 #Jumlah clasifikasi yg kita inginkan

#KMeans
from sklearn.cluster import KMeans
K_means= KMeans(n_clusters=n_rocktype, random_state=42,n_init=20 )
label_Kmeans = K_means.fit_predict(X_standard)
data['Kmeans']=label_Kmeans

#GMM
from sklearn.mixture import GaussianMixture
Gaussian_Mixture = GaussianMixture(n_components=n_rocktype,covariance_type='full', random_state=42)
label_GM = Gaussian_Mixture.fit_predict(X_standard)
data['GM']=label_GM

data.head()



In [None]:
#ML evaluation
from sklearn.metrics import silhouette_score

SL_Kmeans = silhouette_score(X_standard, label_Kmeans)
SL_GM = silhouette_score(X_standard, label_GM)

Results = pd.DataFrame({"Model": ["KMeans", "GM"],"Silhouette": [SL_Kmeans, SL_GM]})
Results.head()


In [None]:
sns.scatterplot(data=data, x=GR, y=DeltaPHI, palette="tab10", hue="Kmeans")
plt.title('GR VS DeltaPHI after ML KMeans')
plt.show()


#

In [None]:
sns.scatterplot(data=data, x=GR, y=DeltaPHI, palette="tab10", hue="GM")
plt.title('GR VS DeltaPHI AFTER ML-Gausian')
plt.show()




**Tensor flow**

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

input_dim = X_standard.shape[1]   # jumlah fitur
latent_dim = 2                  # latent space 2D agar mudah divisualisasikan

# Encoder
inputs = keras.Input(shape=(input_dim,))
x = layers.Dense(16, activation="relu")(inputs)
x = layers.Dense(8, activation="relu")(x)
latent = layers.Dense(latent_dim, activation="linear", name="latent_layer")(x)

# Decoder
x = layers.Dense(8, activation="relu")(latent)
x = layers.Dense(16, activation="relu")(x)
outputs = layers.Dense(input_dim, activation="linear")(x)

autoencoder = keras.Model(inputs, outputs, name="autoencoder")
encoder = keras.Model(inputs, latent, name="encoder")

autoencoder.compile(optimizer="adam", loss="mse")
autoencoder.summary()


In [None]:
history = autoencoder.fit(
    X_standard, X_standard,
    epochs=100,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)


In [None]:
plt.figure(figsize=(6,4))
plt.plot(history.history["loss"], label="Train loss")
plt.plot(history.history["val_loss"], label="Val loss")
plt.xlabel("Epoch")
plt.ylabel("MSE loss")
plt.legend()
plt.title("Training Autoencoder Loss")
plt.show()


In [None]:
# Dapatkan koordinat latent (2D) untuk tiap sampel
Z = encoder.predict(X_standard)  # shape (n_samples, latent_dim)
print("Shape latent:", Z.shape)

# KMeans di latent space
kmeans_latent = KMeans(n_clusters=n_rocktype, random_state=42)
labels_latent = kmeans_latent.fit_predict(Z)

sil_latent = silhouette_score(Z, labels_latent)
print("Silhouette KMeans (latent autoencoder):", sil_latent)


In [None]:
plt.figure(figsize=(6,5))
sns.scatterplot(
    x=Z[:,0], y=Z[:,1],
    hue=labels_latent,
    palette="tab10",
    s=20
)
plt.title("Cluster di Latent Space (Autoencoder + KMeans)")
plt.xlabel("Latent dim 1")
plt.ylabel("Latent dim 2")
plt.legend(title="Cluster", bbox_to_anchor=(1.05, 1), loc="upper left")
plt.tight_layout()
plt.show()


In [None]:
fig, axes = plt.subplots(1, 2, figsize=(12,5))

# 1) KMeans di fitur asli (diproyeksikan ke latent via encoder)
labels_base_on_latent = label_Kmeans  # label sama, tapi koordinat dipakai Z

sns.scatterplot(
    x=Z[:,0], y=Z[:,1],
    hue=labels_base_on_latent,
    palette="tab10",
    s=15, ax=axes[0]
)
axes[0].set_title("KMeans (fitur asli), diproyeksikan ke latent")
axes[0].set_xlabel("Latent dim 1")
axes[0].set_ylabel("Latent dim 2")

# 2) KMeans di latent space
sns.scatterplot(
    x=Z[:,0], y=Z[:,1],
    hue=labels_latent,
    palette="tab10",
    s=15, ax=axes[1]
)
axes[1].set_title("KMeans (latent autoencoder)")
axes[1].set_xlabel("Latent dim 1")
axes[1].set_ylabel("Latent dim 2")

for ax in axes:
    ax.legend(title="Cluster", bbox_to_anchor=(1.05, 1), loc="upper left")

plt.tight_layout()
plt.show()


In [None]:
data_cmp = {
    "Metode": ["KMeans (fitur asli)", "Autoencoder + KMeans (latent)"],
    "Silhouette Score": [SL_Kmeans, sil_latent]
}
df_cmp = pd.DataFrame(data_cmp)
df_cmp


In [None]:
plt.figure(figsize=(12,5))

# Plot ML klasik: KMeans di fitur asli
plt.subplot(1, 2, 1)
sns.scatterplot(
    x=data["GR"],
    y=data["DeltaPHI"],
    hue=label_Kmeans,
    palette="tab10",
    s=15,
    edgecolor=None
)
plt.title("GR vs DeltaPHI - KMeans (ML klasik)")
plt.xlabel("GR")
plt.ylabel("DeltaPHI")
plt.legend(title="Cluster", bbox_to_anchor=(1.05, 1), loc="upper left")

# Plot Deep Learning: Autoencoder + KMeans latent
plt.subplot(1, 2, 2)
sns.scatterplot(
    x=data["GR"],
    y=data["DeltaPHI"],
    hue=labels_latent,
    palette="tab10",
    s=15,
    edgecolor=None
)
plt.title("GR vs DeltaPHI - Autoencoder + KMeans")
plt.xlabel("GR")
plt.ylabel("DeltaPHI")
plt.legend(title="Cluster", bbox_to_anchor=(1.05, 1), loc="upper left")

plt.tight_layout()
plt.show()


In [None]:
import numpy as np

# 1) Tandai mana yang beda cluster antara ML dan DL
different = label_Kmeans != labels_latent    # array boolean

plt.figure(figsize=(12,5))

# ------------------------#
# Plot ML klasik
# ------------------------#
plt.subplot(1, 2, 1)
sns.scatterplot(
    x=data["GR"],
    y=data["DeltaPHI"],
    hue=label_Kmeans,
    palette="tab10",
    s=15,
    edgecolor=None
)
plt.title("GR vs DeltaPHI - KMeans (ML klasik)")
plt.xlabel("GR")
plt.ylabel("DeltaPHI")
plt.legend(title="Cluster", bbox_to_anchor=(1.05, 1), loc="upper left")

# ------------------------#
# Plot Deep Learning + mark perbedaan
# ------------------------#
plt.subplot(1, 2, 2)
# titik dasar: warna sesuai cluster DL
sns.scatterplot(
    x=data["GR"],
    y=data["DeltaPHI"],
    hue=labels_latent,
    palette="tab10",
    s=15,
    edgecolor=None
)

# overlay: titik yang beda cluster diberi tanda 'x' hitam
plt.scatter(
    data["GR"][different],
    data["DeltaPHI"][different],
    c="none",
    edgecolors="black",
    marker="x",
    s=30,
    label="Berbeda cluster ML vs DL"
)

plt.title("GR vs DeltaPHI - Autoencoder + KMeans")
plt.xlabel("GR")
plt.ylabel("DeltaPHI")
plt.legend(title="Cluster / Perbedaan", bbox_to_anchor=(1.05, 1), loc="upper left")

plt.tight_layout()
plt.show()


**supervised**

In [None]:
import pandas as pd
import numpy as np

# asumsi df sudah terbaca seperti di Week12
df = pd.read_csv("DrillingData.csv")

#target
df ['FlowImbalance'] = df ['FIn'] - df ['FOut']
target_col = "FlowImbalance"          # ganti jika mau target lain
feature_cols = [c for c in df.columns if c != target_col]

X = df[feature_cols].values
y = df[target_col].values

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

print(X_train_scaled.shape, y_train.shape)


In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

input_dim = X_train_scaled.shape[1]

model = keras.Sequential([
    layers.Input(shape=(input_dim,)),
    layers.Dense(64, activation="relu"),
    layers.Dense(32, activation="relu"),
    layers.Dense(1, activation="linear")   # regresi
])

model.compile(optimizer="adam", loss="mse", metrics=["mae"])
model.summary()

history = model.fit(
    X_train_scaled, y_train,
    validation_split=0.2,
    epochs=100,
    batch_size=32,
    verbose=1
)


In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10,4))

plt.subplot(1,2,1)
plt.plot(history.history["loss"], label="Train")
plt.plot(history.history["val_loss"], label="Val")
plt.xlabel("Epoch"); plt.ylabel("MSE")
plt.title("Loss")
plt.legend()

plt.subplot(1,2,2)
plt.plot(history.history["mae"], label="Train")
plt.plot(history.history["val_mae"], label="Val")
plt.xlabel("Epoch"); plt.ylabel("MAE")
plt.title("MAE")
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
from sklearn.metrics import mean_squared_error, r2_score

y_pred = model.predict(X_test_scaled).ravel()

mse_test = mean_squared_error(y_test, y_pred)
r2_test  = r2_score(y_test, y_pred)
print("Test MSE:", mse_test)
print("Test R2 :", r2_test)

plt.figure(figsize=(5,5))
plt.scatter(y_test, y_pred, s=10, alpha=0.5)
plt.xlabel("FlowImbalance aktual")
plt.ylabel("FlowImbalance prediksi")
plt.title("Prediksi DL vs aktual")
plt.plot([y_test.min(), y_test.max()],
         [y_test.min(), y_test.max()],
         "r--", label="y = x")
plt.legend()
plt.tight_layout()
plt.show()
