<a href="https://colab.research.google.com/github/elangbijak4/Riset-Sistem-Cerdas-LLM-2025/blob/main/AI_Life_Cycle_dalam_Satu_Naskah.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install dependencies
!pip install pandas scikit-learn huggingface_hub joblib

In [18]:
# STEP 1: DATA COLLECTION
from sklearn.datasets import fetch_california_housing
import pandas as pd

In [19]:
# Fetch dataset
data = fetch_california_housing(as_frame=True)
df = data.frame
print("Dataset Sample:")
print(df.head())

Dataset Sample:
   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  Longitude  MedHouseVal
0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88    -122.23        4.526
1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86    -122.22        3.585
2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85    -122.24        3.521
3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85    -122.25        3.413
4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85    -122.25        3.422


In [20]:
# Save raw data for versioning (optional)
df.to_csv("housing_raw.csv", index=False)

In [21]:
# STEP 2: DATA PREPARATION
# Preprocessing
X = df.drop("MedHouseVal", axis=1)  # Features
y = df["MedHouseVal"]               # Target
print("\nData prepared (features and target split).")


Data prepared (features and target split).


In [22]:
# STEP 3: MODEL DEVELOPMENT
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error

In [23]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [24]:
# Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

In [25]:
# Validate model
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print("\nModel trained. Mean Squared Error (MSE):", mse)


Model trained. Mean Squared Error (MSE): 0.255553781221915


In [26]:
# Save trained model
import joblib
joblib.dump(model, "model.joblib")
print("\nTrained model saved as 'model.joblib'.")


Trained model saved as 'model.joblib'.


In [27]:
# STEP 4: MODEL DEPLOYMENT
from huggingface_hub import HfApi, login

In [28]:
# Login to Hugging Face (replace 'YOUR_HF_TOKEN' with your token)
HUGGINGFACE_TOKEN = "MASUKKAN TOKEN HUGGING FACE ANDA DISINI"  # Replace with your Hugging Face token
login(token=HUGGINGFACE_TOKEN)

In [29]:
# Upload model to Hugging Face
repo_name = "housing-model-demo"  # Customize this name
api = HfApi()
api.create_repo(repo_id=f"elangbijak4/{repo_name}", repo_type="model", exist_ok=True)
api.upload_file(
    path_or_fileobj="model.joblib",
    path_in_repo="model.joblib",
    repo_id=f"elangbijak4/{repo_name}",
    repo_type="model"
)
print(f"\nModel deployed to Hugging Face Hub: https://huggingface.co/elangbijak4/{repo_name}")

No files have been modified since last commit. Skipping to prevent empty commit.



Model deployed to Hugging Face Hub: https://huggingface.co/elangbijak4/housing-model-demo


In [30]:
# STEP 5: MONITORING (basic logging in Colab)
# Log MSE result to monitor model performance
with open("model_metrics.log", "w") as log_file:
    log_file.write(f"MSE: {mse}\n")
print("\nModel performance logged.")


Model performance logged.


In [31]:
# STEP 6: RETRAINING (if necessary)
# Example of retraining process (can be automated with new data)
# New data simulation (for demo purpose)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=21)
model.fit(X_train, y_train)
y_pred_new = model.predict(X_test)
new_mse = mean_squared_error(y_test, y_pred_new)
print("\nRetrained model MSE:", new_mse)


Retrained model MSE: 0.25537696071850113


In [None]:
# Save updated model
joblib.dump(model, "model_retrained.joblib")
print("\nRetrained model saved as 'model_retrained.joblib'.")