In [5]:
import pandas as pd
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.metrics import silhouette_score
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import MinMaxScaler
from sklearn.cluster import AgglomerativeClustering
from sklearn.tree import DecisionTreeClassifier
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

In [6]:
# Load the dataset
df = pd.read_csv('D:\Acciojob\ML Project\medical_records.csv')

In [None]:
# Preprocessing for clustering
X_cluster = df[['medical_conditions', 'medications']]
X_cluster = pd.get_dummies(X_cluster, columns=['medical_conditions', 'medications'])
scaler = StandardScaler()
X_cluster_scaled = scaler.fit_transform(X_cluster)

In [None]:
# Clustering using K-Means
kmeans = KMeans(n_clusters=3, random_state=42)
cluster_labels = kmeans.fit_predict(X_cluster_scaled)

In [None]:
# Preprocessing for classification
X_classification = df.drop(['patient_id', 'name', 'date_of_birth', 'gender', 'allergies', 'last_appointment_date'], axis=1)
y_classification = df['no_show']
X_classification = pd.get_dummies(X_classification)
X_train, X_test, y_train, y_test = train_test_split(X_classification, y_classification, test_size=0.2, random_state=42)

In [None]:
# Classification using SVM
svm_model = SVC()
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
svm_accuracy = accuracy_score(y_test, svm_predictions)
svm_classification_report = classification_report(y_test, svm_predictions)
svm_confusion_matrix = confusion_matrix(y_test, svm_predictions)

In [None]:
# Classification using Neural Network
nn_model = MLPClassifier(random_state=42)
nn_model.fit(X_train, y_train)
nn_predictions = nn_model.predict(X_test)
nn_accuracy = accuracy_score(y_test, nn_predictions)
nn_classification_report = classification_report(y_test, nn_predictions)
nn_confusion_matrix = confusion_matrix(y_test, nn_predictions)

In [None]:
# Forecasting medication demand
medication_demand = df.groupby('medications').size()
medication_demand = medication_demand.reset_index(name='demand')
medication_demand['date'] = pd.to_datetime(df['last_appointment_date'])
medication_demand.set_index('date', inplace=True)
medication_demand = medication_demand.resample('Q').sum()

In [None]:
# Identify patterns in medication allergies
allergies_demographics = df.groupby(['gender', 'age_group'])['allergies'].apply(list).reset_index()

In [None]:
# Preprocessing for Decision Tree
X_tree = df[['age', 'gender']]
y_tree = df['medical_conditions']
X_tree = pd.get_dummies(X_tree)
X_train_tree, X_test_tree, y_train_tree, y_test_tree = train_test_split(X_tree, y_tree, test_size=0.2, random_state=42)

In [None]:
# Decision Tree model
tree_model = DecisionTreeClassifier(random_state=42)
tree_model.fit(X_train_tree, y_train_tree)

In [None]:
# TF-IDF with K-Means
tfidf_vectorizer = TfidfVectorizer()
X_tfidf = tfidf_vectorizer.fit_transform(df['medical_conditions'])

In [None]:
# LDA for topic modeling
lda_model = LatentDirichletAllocation(n_components=3, random_state=42)
lda_topics = lda_model.fit_transform(X_tfidf)

In [None]:
# Preprocessing for classification
X_adherence = df.drop(['patient_id', 'name', 'date_of_birth', 'gender', 'allergies', 'last_appointment_date'], axis=1)
y_adherence = df['adherence']
X_adherence = pd.get_dummies(X_adherence)
X_train_adherence, X_test_adherence, y_train_adherence, y_test_adherence = train_test_split(X_adherence, y_adherence, test_size=0.2, random_state=42)

In [None]:
# Random Forest Classifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train_adherence, y_train_adherence)
rf_predictions = rf_model.predict(X_test_adherence)
rf_accuracy = accuracy_score(y_test_adherence, rf_predictions)
rf_classification_report = classification_report(y_test_adherence, rf_predictions)
rf_confusion_matrix = confusion_matrix(y_test_adherence, rf_predictions)

## THANK YOU ACCIOJOB WHOLE TEAM FOR MY AWESOME JOURNEY 