In [20]:
# Importnecessarylibraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier # Import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix

### Load the dataset

In [7]:
df = pd.read_csv('blood.csv')

In [8]:
df.head()

Unnamed: 0,Recency,Frequency,Monetary,Time,Class
0,2,50,12500,99,1
1,0,13,3250,28,1
2,1,17,4000,36,1
3,2,20,5000,45,1
4,1,24,6000,77,0


In [9]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 748 entries, 0 to 747
Data columns (total 5 columns):
 #   Column     Non-Null Count  Dtype
---  ------     --------------  -----
 0   Recency    748 non-null    int64
 1   Frequency  748 non-null    int64
 2   Monetary   748 non-null    int64
 3   Time       748 non-null    int64
 4   Class      748 non-null    int64
dtypes: int64(5)
memory usage: 29.3 KB


In [10]:
df.describe()

Unnamed: 0,Recency,Frequency,Monetary,Time,Class
count,748.0,748.0,748.0,748.0,748.0
mean,9.506684,5.516043,1378.676471,34.284759,0.237968
std,8.095396,5.841825,1459.826781,24.380307,0.426124
min,0.0,1.0,250.0,2.0,0.0
25%,2.75,2.0,500.0,16.0,0.0
50%,7.0,4.0,1000.0,28.0,0.0
75%,14.0,7.0,1750.0,50.0,0.0
max,74.0,50.0,12500.0,99.0,1.0


In [11]:
df.isnull().sum()

Unnamed: 0,0
Recency,0
Frequency,0
Monetary,0
Time,0
Class,0


# Separate features and labels

In [14]:
X =df.drop('Class', axis=1)
y = df['Class']

In [28]:
 # Identify numerical and categorical features
numerical_features = ['Recency', 'Frequency', 'Monetary', 'Time']
categorical_features = []

In [29]:
# Create preprocessing pipelines for numerical and categorical features
numerical_pipeline = Pipeline(steps=[
 ('scaler', StandardScaler())
 ])
categorical_pipeline = Pipeline(steps=[
 ('encoder', OneHotEncoder(drop='first'))
 ])
preprocessor = ColumnTransformer(transformers=[
 ('num', numerical_pipeline, numerical_features),
 ('cat', categorical_pipeline, categorical_features)
 ])

# Split the data in to train and test

In [33]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [34]:
 # Create a pipeline that includes preprocessing and model training
model_pipeline = Pipeline(steps=[
 ('preprocessor', preprocessor),
 ('classifier', RandomForestClassifier(random_state=42))
])

In [35]:
# Train the model
model_pipeline.fit(X_train, y_train)

In [36]:
# Make predections
y_pred = model_pipeline.predict(X_test)

In [37]:
# Evaluate the model
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))

[[100  13]
 [ 28   9]]
              precision    recall  f1-score   support

           0       0.78      0.88      0.83       113
           1       0.41      0.24      0.31        37

    accuracy                           0.73       150
   macro avg       0.60      0.56      0.57       150
weighted avg       0.69      0.73      0.70       150



In [41]:
# Generate personalized recommendations (mock example)
def generate_recommendations(patient_data):
 prediction = model_pipeline.predict(patient_data)
 # Mappredictions to actual recommendations (this will depend on your dataset and model)
 recommendation_mapping = {0: 'No action needed', 1: 'Regular check-up', 2:
 'Lifestyle changes', 3: 'Medication'}
 return recommendation_mapping[prediction[0]]

 # Example patient data for recommendation generation


In [42]:
example_patient_data = pd.DataFrame({
    'Recency': [10],
    'Frequency': [3],
    'Monetary': [500],
    'Time': [30],
    'Class': [1]


})

In [46]:
print(generate_recommendations(example_patient_data))

No action needed


## Project Report

This project aimed to build a machine learning model to predict a target variable ('Class') based on features including 'Recency', 'Frequency', 'Monetary', and 'Time'.

**Steps taken:**

1.  **Data Loading and Exploration:** The `blood.csv` dataset was loaded into a pandas DataFrame. Initial exploration was performed using `df.head()`, `df.info()`, `df.describe()`, and checking for null values.
2.  **Data Preparation:** Features (`X`) and the target variable (`y`) were separated. Numerical features were identified. A preprocessing pipeline was created using `StandardScaler` for numerical features.
3.  **Data Splitting:** The data was split into training and testing sets using `train_test_split` with a test size of 0.2.
4.  **Model Training:** A machine learning pipeline was created including the preprocessor and a `RandomForestClassifier` model. The model was trained on the training data.
5.  Model Evaluation: The model's performance was evaluated on the test set using `confusion_matrix` and `classification_report`.
