In [5]:
pip install kneed

Note: you may need to restart the kernel to use updated packages.


In [None]:
pip install xgboost

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score
import joblib

In [2]:
data = pd.read_csv('data_clusters.csv')

In [3]:
list_of_clusters = data['Clusters'].unique()
list_of_clusters

array([0, 2, 1], dtype=int64)

In [4]:
def get_best_param_for_random_forest(x_train, y_train):
    
    #initialise the model
    clf = RandomForestClassifier()
    
    param_grid = {
            'n_estimators': [10,50,100,150],
            "criterion" : ['gini' , 'entropy'],
            "max_depth" : range(2,4),
            "max_features" : ['auto' , 'log2']
    }
        
    grid = GridSearchCV(estimator=clf, param_grid = param_grid, cv = 5 , verbose = 0)
    print('[Info] Model Grid search training started')
    grid.fit(x_train , y_train)
    
    
    criterion = grid.best_params_['criterion']
    n_estimators = grid.best_params_['n_estimators']
    max_depth = grid.best_params_['max_depth']
    max_features = grid.best_params_['max_features']
    
    # train the random forest
    clf = RandomForestClassifier(n_estimators = n_estimators , 
                                 criterion = criterion , 
                                 max_depth=max_depth , 
                                 max_features=max_features)
    print('[Info] Model training started')
    
    clf.fit(x_train , y_train)
    
    return clf

def get_best_param_for_logistic(x_train, y_train):
    pass

def get_best_param_for_xgboost(x_train, y_train):
    
    xgb = XGBClassifier(objective='binary:logistic')
    
    param_grid_xgboost = {
        'learning_rate' :[0.5,0.1,0.01,0.001],
        'max_depth':[3,5,10,20],
        'n_estimators':[10,50,100,200]
    }
    
    grid = GridSearchCV(estimator=xgb, param_grid = param_grid_xgboost, cv = 5 , verbose = 0)
    grid.fit(x_train , y_train)
    
    learning_rate = grid.best_params_['learning_rate']
    n_estimators = grid.best_params_['n_estimators']
    max_depth = grid.best_params_['max_depth']
    
    xgb = XGBClassifier(learning_rate = learning_rate , n_estimators=n_estimators , max_depth= max_depth)
    xgb.fit(x_train, y_train)
    
    return xgb
    
    
def get_best_model(x_train , x_test, y_train , y_test):
    
    # random forest
    random_forest = get_best_param_for_random_forest(x_train,  y_train)
    random_forest_prediction = random_forest.predict(x_test)
    random_forest_score = accuracy_score(y_test ,random_forest_prediction )
    
    #xgboost
    xg_boost = get_best_param_for_xgboost(x_train,  y_train)
    xg_boost_prediction = xg_boost.predict(x_test)
    xg_boost_score = accuracy_score(y_test ,xg_boost_prediction )
    
    if xg_boost_score > random_forest_score:
        return 'xgboost' , xg_boost
    else:
        return 'random_forest' , random_forest

In [5]:
mkdir models

A subdirectory or file models already exists.


In [6]:
### parsing all the clusters and lokking for the best ml algorithm to fit on individual clusters

for i in list_of_clusters:
    cluster_data = data[data['Clusters']==i] #filter the data for one cluster
    
    # prepare a data
    
    cluster_features = cluster_data.drop(['Labels' , 'Clusters'], axis = 1)
    cluster_label = cluster_data['Labels']
    
    # split the data
    x_train , x_test, y_train , y_test = train_test_split(cluster_features, cluster_label , test_size = 1/3, random_state=101)
    model_name , model = get_best_model(x_train , x_test, y_train , y_test)
    print('[INFO] model Trained')
    
    joblib.dump(model , f'models/{model_name}_{i}.pkl')

[Info] Model Grid search training started
[Info] Model training started








































































































































































































































































































































































































































































































































































































































































































































































































































































































































[INFO] model Trained
[Info] Model Grid search training started
[Info] Model training started








































































































































































































































































































































































































































































































































































































































[INFO] model Trained
[Info] Model Grid search training started
[Info] Model training started
































































































































































































































































































































































































































































































































































































































































































































































[INFO] model Trained




In [7]:
# run all the ipynb file
# Research work = Xgboost
# fill up the fucntions

In [None]:
Predction

In [None]:
y = mx+c