<a href="https://colab.research.google.com/github/capahdan/My-Me-Health/blob/main/Model_ML.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Preprocessing Data

In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report

In [32]:
df = pd.read_csv('/content/PHQ9_Results.csv')
missing_values = df.isnull().sum()

print(df)

        SEQN  RIDAGEYR  DPQ010  DPQ020  DPQ030  DPQ040  DPQ050  DPQ060  \
0     109266        29       0       0       0       0       0       0   
1     109271        49       2       1       0       0       0       0   
2     109273        36       2       2       2       2       2       2   
3     109274        68       0       0       0       0       0       0   
4     109282        76       0       1       0       1       0       0   
...      ...       ...     ...     ...     ...     ...     ...     ...   
7877  124815        52       0       0       0       0       0       0   
7878  124817        67       1       1       1       1       0       0   
7879  124818        40       0       0       0       1       0       0   
7880  124821        63       3       0       0       2       0       0   
7881  124822        74       0       0       0       2       2       0   

      DPQ070  DPQ080  DPQ090  Total       Action to take       Expectation  
0          0       0       0      

In [33]:
if missing_values.any():
  print("Telah ditemukan data yang hilang")
else:
  print("Tidak ada data yang hilang")

Tidak ada data yang hilang


# Split Data

In [34]:
X = df[['DPQ010', 'DPQ020', 'DPQ030', 'DPQ040', 'DPQ050', 'DPQ060', 'DPQ070', 'DPQ080', 'DPQ090']]
y = pd.cut(df['Total'], bins=[-float('inf'), 5, 9, 14, 19, float('inf')], labels=['Normal', 'Minimal Symptoms', 'Minor Depression', 'Major Depression', 'Severe'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training data shape:", X_train.shape, y_train.shape)
print("Test data shape:", X_test.shape, y_test.shape)

Training data shape: (6305, 9) (6305,)
Test data shape: (1577, 9) (1577,)


# Normalize

In [35]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Apply PCA

In [36]:
pca = PCA(n_components=None)  # Set the desired number of components
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

explained_variance_ratio = pca.explained_variance_ratio_
cumulative_variance = np.cumsum(explained_variance_ratio)
n_components = np.argmax(cumulative_variance >= 0.95) + 1

pca = PCA(n_components=n_components)
X_train_pca = pca.fit_transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)  # Add this line to transform the testing data

# Decision Tree

In [37]:
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train_pca, y_train)
y_pred = decision_tree.predict(X_test_pca)

accuracy = accuracy_score(y_test, y_pred)
classification_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report)

Accuracy: 0.9917564996829423
Classification Report:
                  precision    recall  f1-score   support

Major Depression       0.90      0.93      0.92        41
Minimal Symptoms       0.98      0.98      0.98       190
Minor Depression       0.97      0.98      0.98       100
          Normal       1.00      1.00      1.00      1229
          Severe       0.88      0.82      0.85        17

        accuracy                           0.99      1577
       macro avg       0.95      0.94      0.94      1577
    weighted avg       0.99      0.99      0.99      1577



# Checking

In [42]:
input_values = []
for feature in X.columns:
    value = float(input(f"Enter value for {feature}: "))
    input_values.append(value)

input_test = pd.DataFrame([input_values], columns=X.columns)
input_test_scaled = scaler.transform(input_test)
input_test_pca = pca.transform(input_test_scaled)
predicted_label = decision_tree.predict(input_test_pca)

print("Predicted Label:", predicted_label)

Enter value for DPQ010: 2
Enter value for DPQ020: 1
Enter value for DPQ030: 3
Enter value for DPQ040: 2
Enter value for DPQ050: 2
Enter value for DPQ060: 2
Enter value for DPQ070: 2
Enter value for DPQ080: 1
Enter value for DPQ090: 1
Predicted Label: ['Major Depression']


# H5

In [48]:
import joblib
from sklearn.tree import DecisionTreeClassifier

# Train your Decision Tree model
decision_tree = DecisionTreeClassifier()
decision_tree.fit(X_train_pca, y_train)

# Save the Decision Tree model as a pickle file
joblib.dump(decision_tree, 'my_decision_tree.pkl')


['my_decision_tree.pkl']